squad_precision_recall

Running

App Files Files Community

omidf commited on Jan 31, 2023

Commit

32e4ba0

1 Parent(s): 8348839

Update compute_score.py

Browse files

Files changed (1) hide show

compute_score.py +11 -34

compute_score.py CHANGED Viewed

@@ -1,5 +1,4 @@
 """ Official evaluation script for v1.1 of the SQuAD dataset. """
 import argparse
 import json
 import re
@@ -26,37 +25,18 @@ def normalize_answer(s):
     return white_space_fix(remove_articles(remove_punc(lower(s))))
-def precision_score(prediction, ground_truth):
-    prediction_tokens = normalize_answer(prediction).split()
-    ground_truth_tokens = normalize_answer(ground_truth).split()
-    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-    num_same = sum(common.values())
-    if num_same == 0:
-        return 0
-    precision = 1.0 * num_same / len(prediction_tokens)
-    return precision
-def recall_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
     ground_truth_tokens = normalize_answer(ground_truth).split()
     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
     num_same = sum(common.values())
     if num_same == 0:
         return 0
     recall = 1.0 * num_same / len(ground_truth_tokens)
-    return recall
-# def f1_score(prediction, ground_truth):
-#     prediction_tokens = normalize_answer(prediction).split()
-#     ground_truth_tokens = normalize_answer(ground_truth).split()
-#     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-#     num_same = sum(common.values())
-#     if num_same == 0:
-#         return 0
-#     precision = 1.0 * num_same / len(prediction_tokens)
-#     recall = 1.0 * num_same / len(ground_truth_tokens)
-#     f1 = (2 * precision * recall) / (precision + recall)
-#     return f1
 def exact_match_score(prediction, ground_truth):
@@ -72,7 +52,7 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
 def compute_score(dataset, predictions):
-    precision = recall = f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
@@ -84,18 +64,15 @@ def compute_score(dataset, predictions):
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
-                precision_qa = metric_max_over_ground_truths(precision_score, prediction, ground_truths)
-                recall_qa = metric_max_over_ground_truths(recall_score, prediction, ground_truths)
-                print(recall_qa, precision_qa)
-                f1 += (2 * precision_qa * recall_qa) / (precision_qa + recall_qa)
-                recall += recall_qa
-                precision += precision_qa
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
-    recall = 100.0 * recall / total
-    precision = 100.0 * precision / total
-    return {"exact_match": exact_match, "f1": f1, "precision": precision , "recall": recall}
 if __name__ == "__main__":

 """ Official evaluation script for v1.1 of the SQuAD dataset. """
 import argparse
 import json
 import re
     return white_space_fix(remove_articles(remove_punc(lower(s))))
+def f1_score(prediction, ground_truth):
     prediction_tokens = normalize_answer(prediction).split()
     ground_truth_tokens = normalize_answer(ground_truth).split()
     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
     num_same = sum(common.values())
     if num_same == 0:
         return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
     recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
 def exact_match_score(prediction, ground_truth):
 def compute_score(dataset, predictions):
+    f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article["paragraphs"]:
             for qa in paragraph["qas"]:
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
+                f1_temp = metric_max_over_ground_truths(f1_score, prediction, ground_truths)
+                print(f1_temp)
+                f1 += f1_temp
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
+    return {"exact_match": exact_match, "f1": f1}
 if __name__ == "__main__":