Update README.md
Browse files
README.md
CHANGED
@@ -60,25 +60,26 @@ The large language model underwent rigorous training to excel in performing a va
|
|
60 |

|
61 |
|
62 |
|
63 |
-
Dataset | exact_match | rouge1 | bleu | bertscore
|
64 |
-
|
65 |
-
ragbench-
|
66 |
-
ragbench-tatqa-
|
67 |
-
|
68 |
-
|
69 |
-
ragbench-
|
70 |
-
|
71 |
-
|
72 |
-
ragbench-
|
73 |
-
ragbench-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
ragbench-
|
80 |
-
ragbench-
|
81 |
-
|
|
|
82 |
|
83 |
|
84 |
SILMA RAG QA Benchmark Score: 0.3478
|
|
|
60 |

|
61 |
|
62 |
|
63 |
+
|Dataset | exact_match | rouge1 | bleu | bertscore|
|
64 |
+
|---|---|---|---|---|
|
65 |
+
|ragbench-finqa-en-test | 0.000 | 0.587 | 0.321 | 0.760|
|
66 |
+
|ragbench-tatqa-ar-test | 0.000 | 0.484 | 0.130 | 0.774|
|
67 |
+
|ragbench-tatqa-en-test | 0.059 | 0.646 | 0.423 | 0.808|
|
68 |
+
|rag-instruct-benchmark-tester-en | 0.370 | 0.683 | 0.196 | 0.791|
|
69 |
+
|ragbench-expertqa-en-test |0.000 | 0.465 | 0.151 | 0.677|
|
70 |
+
|ragbench-msmarco-ar-test |0.000 | 0.144 | 0.096 | 0.781|
|
71 |
+
|sciq-ar-test |0.170 | 0.000 | 0.000 | 0.753|
|
72 |
+
|ragbench-covidqa-en-test |0.020 | 0.521 | 0.242 | 0.734|
|
73 |
+
|ragbench-emanual-ar-test |0.000 | 0.237 | 0.159 | 0.806|
|
74 |
+
|ragbench-finqa-ar-test |0.000 | 0.377 | 0.109 | 0.780|
|
75 |
+
|xquad-r-validation-en |0.120 | 0.326 | 0.041 | 0.603|
|
76 |
+
|ragbench-emanual-en-test |0.000 | 0.565 | 0.288 | 0.722|
|
77 |
+
|xquad-r-ar-validation |0.070 | 0.130 | 0.042 | 0.698|
|
78 |
+
|boolq-ar-test |0.450 | 0.000 | 0.000 | 0.700|
|
79 |
+
|ragbench-hotpotqa-en-test |0.060 | 0.732 | 0.503 | 0.837|
|
80 |
+
|ragbench-covidqa-ar-test |0.000 | 0.179 | 0.104 | 0.783|
|
81 |
+
|ragbench-msmarco-en-test |0.020 | 0.491 | 0.207 | 0.729|
|
82 |
+
|### Benchmark Average Scores |0.079 | 0.386 | 0.177 | 0.749|
|
83 |
|
84 |
|
85 |
SILMA RAG QA Benchmark Score: 0.3478
|