|
import gradio as gr |
|
import pandas as pd |
|
from css_html_js import custom_css |
|
|
|
TITLE = """<h1 align="center" id="space-title">π²πΎ Malaysian Embedding Leaderboard</h1>""" |
|
|
|
INTRODUCTION_TEXT = """ |
|
π The π²πΎ Malaysian Embedding Leaderboard aims to track, rank and evaluate Top-k retrieval using embedding models. All notebooks at https://github.com/mesolitica/embedding-benchmarks, feel free to submit your own score at https://huggingface.co./spaces/mesolitica/Malaysian-Embedding-Leaderboard/discussions with link to the notebook. |
|
|
|
## Dataset |
|
|
|
π We evaluate models based on 4 datasets, |
|
|
|
1. CrossRef Melayu related DOI, https://huggingface.co./datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-crossref-melayu-malay.jsonl |
|
2. Epenerbitan, https://huggingface.co./datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-epenerbitan-malay.jsonl |
|
3. gov.my PDF files, https://huggingface.co./datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-gov.my.jsonl |
|
4. lom.agc.gov.my PDF files, https://huggingface.co./datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-lom-agc.jsonl |
|
""" |
|
|
|
demo = gr.Blocks(css=custom_css) |
|
with demo: |
|
gr.HTML(TITLE) |
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
demo.launch() |