echo840 commited on
Commit
e5930c4
·
verified ·
1 Parent(s): 301f36c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -172,7 +172,7 @@ def build_leaderboard_tab(leaderboard_table_file, text_recog_file, Inaccessible_
172
  # arena table
173
  with gr.Tab("OCRBench", id=0):
174
  arena_table_vals = get_arena_table(model_table_df)
175
- md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
176
  gr.Markdown(md, elem_id="leaderboard_markdown")
177
  gr.Dataframe(
178
  headers=[
@@ -207,7 +207,7 @@ def build_leaderboard_tab(leaderboard_table_file, text_recog_file, Inaccessible_
207
  )
208
  with gr.Tab("Text Recognition", id=1):
209
  arena_table_vals = get_recog_table(recog_table_df)
210
- md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation."
211
  gr.Markdown(md, elem_id="leaderboard_markdown")
212
  gr.Dataframe(
213
  headers=[
@@ -281,7 +281,7 @@ def build_leaderboard_tab(leaderboard_table_file, text_recog_file, Inaccessible_
281
  pass
282
  md_tail = f"""
283
  # Notice
284
- To reduce false positives, we filter out questions that have answers containing fewer than 4 symbols from all datasets. Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench.
285
  If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
286
  gr.Markdown(md_tail, elem_id="leaderboard_markdown")
287
 
 
172
  # arena table
173
  with gr.Tab("OCRBench", id=0):
174
  arena_table_vals = get_arena_table(model_table_df)
175
+ md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It consists of five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, with all answers containing no fewer than 4 symbols to reduce false positives. All answers undergo manual verification and correction to ensure a more precise evaluation."
176
  gr.Markdown(md, elem_id="leaderboard_markdown")
177
  gr.Dataframe(
178
  headers=[
 
207
  )
208
  with gr.Tab("Text Recognition", id=1):
209
  arena_table_vals = get_recog_table(recog_table_df)
210
+ md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It consists of five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, with all answers containing no fewer than 4 symbols to reduce false positives. All answers undergo manual verification and correction to ensure a more precise evaluation."
211
  gr.Markdown(md, elem_id="leaderboard_markdown")
212
  gr.Dataframe(
213
  headers=[
 
281
  pass
282
  md_tail = f"""
283
  # Notice
284
+ Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench.
285
  If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at [email protected]. We will update the leaderboard in time."""
286
  gr.Markdown(md_tail, elem_id="leaderboard_markdown")
287