ocrbench-leaderboard / leadboard.py
echo840's picture
Upload 2 files
4936caa verified
raw
history blame
5.81 kB
import ast
import argparse
import glob
import pickle
import gradio as gr
import numpy as np
import pandas as pd
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_leaderboard_table_csv(filename, add_hyperlink=True):
lines = open(filename).readlines()
heads = [v.strip() for v in lines[0].split(",")]
rows = []
for i in range(1, len(lines)):
row = [v.strip() for v in lines[i].split(",")]
for j in range(len(heads)):
item = {}
for h, v in zip(heads, row):
if h != "Model" and h != "Link":
item[h] = int(v)
else:
item[h] = v
if add_hyperlink:
item["Model"] = model_hyperlink(item["Model"], item["Link"])
rows.append(item)
return rows
def get_arena_table(model_table_df):
# sort by rating
model_table_df = model_table_df.sort_values(by=["Final Score"], ascending=False)
values = []
for i in range(len(model_table_df)):
row = []
model_key = model_table_df.index[i]
model_name = model_table_df["Model"].values[model_key]
# rank
row.append(i + 1)
# model display name
row.append(model_name)
row.append(
model_table_df["Text Recognition"].values[model_key]
)
row.append(
model_table_df["Scene Text-Centric VQA"].values[model_key]
)
row.append(
model_table_df["Doc-Oriented VQA"].values[model_key]
)
row.append(
model_table_df["KIE"].values[model_key]
)
row.append(
model_table_df["HMER"].values[model_key]
)
row.append(
model_table_df["Final Score"].values[model_key]
)
values.append(row)
return values
def build_leaderboard_tab(leaderboard_table_file, show_plot=False):
if leaderboard_table_file:
data = load_leaderboard_table_csv(leaderboard_table_file)
model_table_df = pd.DataFrame(data)
md_head = f"""
# πŸ† OCRBench Leaderboard
| [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2305.07895) |
"""
gr.Markdown(md_head, elem_id="leaderboard_markdown")
with gr.Tabs() as tabs:
# arena table
arena_table_vals = get_arena_table(model_table_df)
with gr.Tab("OCRBench", id=0):
md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"Name",
"Text Recognition",
"Scene Text-Centric VQA",
"Doc-Oriented VQA",
"KIE",
"HMER",
"Final Score",
],
datatype=[
"str",
"markdown",
"number",
"number",
"number",
"number",
"number",
"number",
],
value=arena_table_vals,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[60, 120, 150, 200, 180, 80, 80, 160],
wrap=True,
)
else:
pass
md_tail = f"""
# Notice
If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
gr.Markdown(md_tail, elem_id="leaderboard_markdown")
def build_demo(leaderboard_table_file):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="OCRBench Leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab(
leaderboard_table_file, show_plot=True
)
return demo
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--share", action="store_true")
parser.add_argument("--OCRBench_file", type=str, default="/home/zhangli/lz/OCRBench/OCRBench.csv")
args = parser.parse_args()
demo = build_demo(args.OCRBench_file)
demo.launch(server_name="0.0.0.0",server_port=7682)