Weyaxi
/

metadata-test

Eval Results

Model card Files Files and versions Community

Weyaxi commited on Dec 2, 2023

Commit

0954803

•

1 Parent(s): a69ad7a

Update README.md

Browse files

Files changed (1) hide show

README.md +72 -51

README.md CHANGED Viewed

@@ -1,98 +1,119 @@
 ---
 license: apache-2.0
 model-index:
-- name: metadatatest
   results:
   - task:
       type: text-generation
     dataset:
-      name: "ai2_arc"
-      type: "ai2_arc"
     metrics:
-       - name: AI2 Reasoning Challenge (25-Shot)
-         type: AI2 Reasoning Challenge (25-Shot)
-         value: 54.3921
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
-  - task:
-      type: text-generation
-    dataset:
-      name: "hellaswag"
-      type: "hellaswag"
-    metrics:
-       - name: HellaSwag (10-shot)
-         type: HellaSwag (10-shot)
-         value: 54.3921
-    source:
-      name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
   - task:
       type: text-generation
     dataset:
-      name: "mmlu"
-      type: "mmlu"
     metrics:
-       - name: MMLU (5-Shot)
-         type: MMLU (5-Shot)
-         value: 54.3921
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
   - task:
       type: text-generation
     dataset:
-      name: "truthful_qa"
-      type: "truthful_qa"
     metrics:
-       - name: TruthfulQA (0-shot)
-         type: TruthfulQA (0-shot)
-         value: 54.3921
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
   - task:
       type: text-generation
     dataset:
-      name: "winogrande"
-      type: "winogrande"
     metrics:
-       - name: Winogrande (5-shot)
-         type: Winogrande (5-shot)
-         value: 49
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
   - task:
       type: text-generation
     dataset:
-      name: "gsm8k"
-      type: "gsm8k"
     metrics:
-       - name: GSM8k (5-shot)
-         type: GSM8k (5-shot)
-         value: 5
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
   - task:
       type: text-generation
     dataset:
-      name: "drop"
-      type: "drop"
     metrics:
-       - name: DROP (3-shot)
-         type: DROP (3-shot)
-         value: 9
     source:
       name: Open LLM Leaderboard
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard
 ---

 ---
 license: apache-2.0
 model-index:
+- name: metadata-test
   results:
+  # AI2 Reasoning Challenge (25-Shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: AI2 Reasoning Challenge (25-Shot)
+      type: ai2_arc
+      config: ARC-Challenge
+      split: test
+      args:
+        num_few_shot: 25
     metrics:
+       - type: acc_norm
+         name: normalized accuracy
+         value: 0.6203071672354948
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
+  # HellaSwag (10-shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: HellaSwag (10-Shot)
+      type: hellaswag
+      split: validation
+      args:
+        num_few_shot: 10
     metrics:
+       - type: acc_norm
+         name: normalized accuracy
+         value: 0.8435570603465445
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
+  # TruthfulQA (0-shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: TruthfulQA (0-shot)
+      type: truthful_qa
+      config: multiple_choice
+      split: validation
+      args:
+        num_few_shot: 0
     metrics:
+       - type: mc2
+         value: 0.5744916942762855
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
+  # GSM8k (5-shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: GSM8k (5-shot)
+      type: gsm8k
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
     metrics:
+       - type: acc
+         name: accuracy
+         value: 0.12736921910538287
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
+  # MMLU (5-Shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: MMLU (5-Shot)
+      type: cais/mmlu
+      config: all
+      split: test
+      args:
+        num_few_shot: 5
     metrics:
+       - type: acc
+         name: accuracy
+         value: 0.6107
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
+  # Winogrande (5-shot)
   - task:
       type: text-generation
+      name: Text Generation
     dataset:
+      name: Winogrande (5-shot)
+      type: winogrande
+      config: winogrande_xl
+      split: validation
+      args:
+        num_few_shot: 5
     metrics:
+       - type: acc
+         name: accuracy
+         value: 0.7774269928966061
     source:
       name: Open LLM Leaderboard
+      url: https://huggingface.co/datasets/open-llm-leaderboard/details_
 ---