Spaces:
Build error
Build error
fix bug in perf calc
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- data/Llama3.1-70B-Chinese-Chat_metrics.csv +11 -11
- data/Llama3.1-70B-Chinese-Chat_results.csv +0 -0
- data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv +5 -5
- data/Llama3.1-8B-Chinese-Chat_metrics.csv +11 -11
- data/Llama3.1-8B-Chinese-Chat_results.csv +0 -0
- data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv +7 -7
- data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv +11 -11
- data/Mistral-7B-v0.3-Chinese-Chat_results.csv +0 -0
- data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +6 -6
- data/Qwen2-72B-Instruct_metrics.csv +11 -11
- data/Qwen2-72B-Instruct_shots_metrics.csv +1 -1
- data/Qwen2-7B-Instruct_metrics.csv +11 -11
- data/Qwen2-7B-Instruct_shots_metrics.csv +2 -2
- data/Qwen2.5-0.5B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-0.5B-Instruct_results.csv +0 -0
- data/Qwen2.5-0.5B-Instruct_shots_metrics.csv +7 -7
- data/Qwen2.5-1.5B-Instruct_metrics.csv +1 -1
- data/Qwen2.5-1.5B-Instruct_results.csv +0 -0
- data/Qwen2.5-1.5B-Instruct_shots_metrics.csv +1 -1
- data/Qwen2.5-3B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-3B-Instruct_results.csv +0 -0
- data/Qwen2.5-3B-Instruct_shots_metrics.csv +7 -7
- data/Qwen2.5-72B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-72B-Instruct_results.csv +0 -0
- data/Qwen2.5-72B-Instruct_shots_metrics.csv +3 -2
- data/Qwen2.5-7B-Instruct_results.csv +0 -0
- data/best_metrics.csv +17 -15
- data/best_results.csv +0 -0
- data/few-shots_metrics.csv +74 -84
- data/fine-tuning_metrics.csv +110 -110
- data/internlm2_5-20b-chat_metrics.csv +11 -11
- data/internlm2_5-20b-chat_results.csv +0 -0
- data/internlm2_5-20b-chat_shots_metrics.csv +1 -1
- data/internlm2_5-7b-chat-1m_metrics.csv +11 -11
- data/internlm2_5-7b-chat-1m_results.csv +0 -0
- data/internlm2_5-7b-chat-1m_shots_metrics.csv +7 -7
- data/internlm2_5-7b-chat_metrics.csv +11 -11
- data/internlm2_5-7b-chat_results.csv +0 -0
- data/internlm2_5-7b-chat_shots_metrics.csv +7 -7
- data/openai_metrics.csv +28 -28
- data/openai_results.csv +0 -0
- datasets/mgtv/dev.csv +0 -3
- datasets/mgtv/val.csv +0 -0
- notebooks/00_Data Analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +2 -2
- notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +2 -2
- notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +2 -2
- notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +2 -2
data/Llama3.1-70B-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
|
3 |
+
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666667
|
4 |
+
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
|
5 |
+
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
|
6 |
+
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
|
7 |
+
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
8 |
+
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
|
9 |
+
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
|
10 |
+
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
|
11 |
+
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
|
12 |
+
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
|
data/Llama3.1-70B-Chinese-Chat_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.
|
3 |
-
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.
|
4 |
-
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.
|
5 |
-
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.
|
6 |
-
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
|
3 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
|
4 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
|
5 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
|
6 |
+
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.758,0.7731535340331644,0.758,0.7565012256889623,0.548
|
data/Llama3.1-8B-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.
|
3 |
-
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
3 |
+
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
|
4 |
+
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333333
|
5 |
+
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
|
6 |
+
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
|
7 |
+
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
8 |
+
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
|
9 |
+
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
|
10 |
+
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
|
11 |
+
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
|
12 |
+
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
|
data/Llama3.1-8B-Chinese-Chat_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.
|
3 |
-
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.
|
4 |
-
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.
|
5 |
-
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.
|
6 |
-
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.
|
7 |
-
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.
|
8 |
-
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
3 |
+
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7605745196939752,0.7056666666666667,0.7269189565098723,0.9886666666666667
|
4 |
+
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
|
5 |
+
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.7690587905035869,0.767,0.7661695279121855,0.979
|
6 |
+
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7693333333333333,0.7765844200886581,0.7693333333333333,0.7697325957683855,0.7326666666666667
|
7 |
+
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6813333333333333,0.7801328325609714,0.6813333333333333,0.7066801804415871,0.759
|
8 |
+
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7173333333333334,0.770076853795054,0.7173333333333334,0.723119179918213,0.6623333333333333
|
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.
|
3 |
-
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
3 |
+
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
|
4 |
+
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
|
5 |
+
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
|
6 |
+
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
|
7 |
+
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
|
8 |
+
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
|
9 |
+
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
|
10 |
+
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
|
11 |
+
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
|
12 |
+
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
|
data/Mistral-7B-v0.3-Chinese-Chat_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.
|
3 |
-
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.
|
4 |
-
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.
|
5 |
-
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.
|
6 |
-
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.
|
7 |
-
40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
|
3 |
+
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
|
4 |
+
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
|
5 |
+
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.08266666666666667
|
6 |
+
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.661,0.7422079284443324,0.661,0.6862974695781847,0.07
|
7 |
+
40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6233333333333333,0.7465186818567994,0.6233333333333333,0.6643697809628606,0.06333333333333334
|
data/Qwen2-72B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
3 |
+
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
|
4 |
+
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
|
5 |
+
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
|
6 |
+
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
|
7 |
+
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
|
8 |
+
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
|
9 |
+
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
|
10 |
+
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
|
11 |
+
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
12 |
+
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
|
data/Qwen2-72B-Instruct_shots_metrics.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
data/Qwen2-7B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.
|
3 |
-
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
|
3 |
+
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666667
|
4 |
+
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
5 |
+
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
|
6 |
+
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
|
7 |
+
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
|
8 |
+
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
|
9 |
+
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666667
|
10 |
+
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
|
11 |
+
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
|
12 |
+
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
|
data/Qwen2-7B-Instruct_shots_metrics.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.
|
3 |
-
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
|
3 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666667
|
data/Qwen2.5-0.5B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.
|
3 |
-
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
|
3 |
+
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.525,0.5819221558338251,0.525,0.4586682135998428,1.0
|
4 |
+
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.54,0.6445255881472232,0.54,0.5293020271128788,1.0
|
5 |
+
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.43766666666666665,0.6565760150511494,0.43766666666666665,0.49167707971005714,1.0
|
6 |
+
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.49933333333333335,0.6513093602943617,0.49933333333333335,0.49913143191054443,1.0
|
7 |
+
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.5523333333333333,0.6622075519433389,0.5523333333333333,0.5627283867177305,1.0
|
8 |
+
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5403333333333333,0.64319564963495,0.5403333333333333,0.5598419070210608,1.0
|
9 |
+
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5843333333333334,0.6559808590166016,0.5843333333333334,0.6086767064128167,1.0
|
10 |
+
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5216666666666666,0.6604678981061621,0.5216666666666666,0.5615446578399996,1.0
|
11 |
+
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.524,0.6673441240188523,0.524,0.5607458201939703,1.0
|
12 |
+
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.507,0.6597337077954278,0.5070000000000001,0.5492280882625964,1.0
|
data/Qwen2.5-0.5B-Instruct_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-0.5B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
|
3 |
+
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.177,0.49074939459487404,0.177,0.2155165894788838,0.004
|
4 |
+
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.35433333333333333,0.5213384036972462,0.35433333333333333,0.39783362635065245,0.068
|
5 |
+
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43666666666666665,0.5234006681691764,0.43666666666666665,0.4691719255495575,0.37266666666666665
|
6 |
+
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39066666666666666,0.5462493905687185,0.39066666666666666,0.4339604066000981,0.07566666666666666
|
7 |
+
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.4653333333333333,0.5468189581246721,0.4653333333333333,0.49752341605759137,0.324
|
8 |
+
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.5026666666666667,0.5610230233594029,0.5026666666666667,0.5163435163649445,0.24333333333333335
|
data/Qwen2.5-1.5B-Instruct_metrics.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.
|
3 |
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.521,0.6393141994049955,0.521,0.5543058103456981,1.0
|
4 |
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5786666666666667,0.6827334710464682,0.5786666666666667,0.6055896299128966,1.0
|
5 |
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.544,0.7064593462910856,0.544,0.5946365105633672,1.0
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
|
3 |
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.521,0.6393141994049955,0.521,0.5543058103456981,1.0
|
4 |
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5786666666666667,0.6827334710464682,0.5786666666666667,0.6055896299128966,1.0
|
5 |
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.544,0.7064593462910856,0.544,0.5946365105633672,1.0
|
data/Qwen2.5-1.5B-Instruct_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-1.5B-Instruct_shots_metrics.csv
CHANGED
@@ -2,7 +2,7 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
2 |
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
|
3 |
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.349,0.5695965528635436,0.349,0.3771117506970461,0.9756666666666667
|
4 |
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.457,0.5932373185073849,0.457,0.4641792696031706,0.9933333333333333
|
5 |
-
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.
|
6 |
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
7 |
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
|
8 |
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
|
|
|
2 |
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
|
3 |
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.349,0.5695965528635436,0.349,0.3771117506970461,0.9756666666666667
|
4 |
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.457,0.5932373185073849,0.457,0.4641792696031706,0.9933333333333333
|
5 |
+
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.24166666666666667,0.5333408149946145,0.24166666666666667,0.30859243868426434,0.8263333333333334
|
6 |
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
7 |
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
|
8 |
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
|
data/Qwen2.5-3B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.
|
3 |
-
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.
|
4 |
-
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.5686666666666667,0.6890626192990656,0.5686666666666667,0.5343419392280258,1.0
|
3 |
+
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6776666666666666,0.7316274074759973,0.6776666666666666,0.6973667827682657,0.999
|
4 |
+
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6653333333333333,0.758958550711478,0.6653333333333333,0.6997203293940804,1.0
|
5 |
+
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.6963333333333334,0.7542353738754336,0.6963333333333334,0.7176790005966858,1.0
|
6 |
+
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.7006666666666667,0.7661325413638986,0.7006666666666667,0.726396504358645,1.0
|
7 |
+
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.6956666666666667,0.766520728596782,0.6956666666666667,0.7232763444431306,1.0
|
8 |
+
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.683,0.7728473029454707,0.6830000000000002,0.7172831663070369,1.0
|
9 |
+
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
|
10 |
+
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
|
11 |
+
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
|
12 |
+
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
|
data/Qwen2.5-3B-Instruct_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-3B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
3 |
+
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333333
|
4 |
+
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
5 |
+
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.46666666666666673,0.5265074036660548,0.9316666666666666
|
6 |
+
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
7 |
+
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
8 |
+
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
data/Qwen2.5-72B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
3 |
+
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
|
4 |
+
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
|
5 |
+
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.7576666666666667,0.8111059140562599,0.7576666666666667,0.778271965273475,1.0
|
6 |
+
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
|
7 |
+
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7396666666666667,0.8132229388907013,0.7396666666666667,0.768164418914878,1.0
|
8 |
+
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7756666666666666,0.8208038975271454,0.7756666666666666,0.7934902567321389,1.0
|
9 |
+
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.767,0.8089726144740825,0.767,0.7826437373554418,1.0
|
10 |
+
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7643333333333333,0.8106183296950366,0.7643333333333333,0.7823942859806713,1.0
|
11 |
+
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.774,0.814496213163251,0.774,0.7893524517536102,1.0
|
12 |
+
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7673333333333333,0.8134721321251935,0.7673333333333333,0.7849776453559993,1.0
|
data/Qwen2.5-72B-Instruct_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-72B-Instruct_shots_metrics.csv
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
3 |
+
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666667
|
4 |
+
10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333333
|
data/Qwen2.5-7B-Instruct_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/best_metrics.csv
CHANGED
@@ -1,16 +1,18 @@
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
1,
|
3 |
-
2,Llama3.1-
|
4 |
-
3,
|
5 |
-
4,
|
6 |
-
5,InternLM2.5-7B
|
7 |
-
6,InternLM2.5-
|
8 |
-
7,
|
9 |
-
8,Qwen2
|
10 |
-
9,Qwen2
|
11 |
-
10,Qwen2.5-
|
12 |
-
11,Qwen2.5-
|
13 |
-
12,
|
14 |
-
13,gpt-4o (
|
15 |
-
14,
|
16 |
-
15,o1-
|
|
|
|
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
1,truth,truth,0.0,0.0,0.0,0.0,0.0
|
3 |
+
2,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
4 |
+
3,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
5 |
+
4,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
|
6 |
+
5,InternLM2.5-7B (0.8-epoch),InternLM2.5-7B (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
7 |
+
6,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
8 |
+
7,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
9 |
+
8,Qwen2-7B (0.4-epoch),Qwen2-7B (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
10 |
+
9,Qwen2-72B (1.8-epoch),Qwen2-72B (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
11 |
+
10,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
|
12 |
+
11,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333333
|
13 |
+
12,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
|
14 |
+
13,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
|
15 |
+
14,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
16 |
+
15,o1-mini (50-shot),o1-mini (50-shot),0.75,0.7767849265833893,0.75,0.7590020698968893,1.0
|
17 |
+
16,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
|
18 |
+
17,Qwen2.5-72B (10-shot),Qwen2.5-72B (10-shot),0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.998
|
data/best_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/few-shots_metrics.csv
CHANGED
@@ -1,86 +1,75 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.
|
3 |
-
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.
|
4 |
-
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.
|
5 |
-
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.
|
6 |
-
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.
|
7 |
-
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.
|
8 |
-
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.
|
9 |
-
0,gpt-4o,gpt-4o/shots-00,0.
|
10 |
-
5,gpt-4o,gpt-4o/shots-05,0.
|
11 |
-
10,gpt-4o,gpt-4o/shots-10,0.
|
12 |
-
20,gpt-4o,gpt-4o/shots-20,0.
|
13 |
-
30,gpt-4o,gpt-4o/shots-30,0.
|
14 |
-
40,gpt-4o,gpt-4o/shots-40,0.
|
15 |
-
50,gpt-4o,gpt-4o/shots-50,0.
|
16 |
-
0,o1-mini,o1-mini/shots-00,0.
|
17 |
-
5,o1-mini,o1-mini/shots-05,0.
|
18 |
-
10,o1-mini,o1-mini/shots-10,0.
|
19 |
-
20,o1-mini,o1-mini/shots-20,0.
|
20 |
-
30,o1-mini,o1-mini/shots-30,0.
|
21 |
-
40,o1-mini,o1-mini/shots-40,0.
|
22 |
-
50,o1-mini,o1-mini/shots-50,0.
|
23 |
-
0,o1-preview,o1-preview/shots-00,0.
|
24 |
-
5,o1-preview,o1-preview/shots-05,0.
|
25 |
-
10,o1-preview,o1-preview/shots-10,0.
|
26 |
-
20,o1-preview,o1-preview/shots-20,0.
|
27 |
-
30,o1-preview,o1-preview/shots-30,0.
|
28 |
-
40,o1-preview,o1-preview/shots-40,0.
|
29 |
-
50,o1-preview,o1-preview/shots-50,0.
|
30 |
-
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.
|
31 |
-
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.
|
32 |
-
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.
|
33 |
-
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.
|
34 |
-
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.
|
35 |
-
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.
|
36 |
-
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.
|
37 |
-
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.
|
38 |
-
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.
|
39 |
-
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.
|
40 |
-
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.
|
41 |
-
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.
|
42 |
-
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.
|
43 |
-
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.
|
44 |
-
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.
|
45 |
-
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.
|
46 |
-
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.
|
47 |
-
40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.
|
48 |
-
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.
|
49 |
-
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.
|
50 |
-
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.
|
51 |
-
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.
|
52 |
-
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.
|
53 |
-
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.
|
54 |
-
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.
|
55 |
-
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.
|
56 |
-
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.
|
57 |
-
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.
|
58 |
-
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.
|
59 |
-
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.
|
60 |
-
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.
|
61 |
-
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.
|
62 |
-
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.
|
63 |
-
0,Qwen2
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
|
74 |
-
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
75 |
-
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.2923333333333333,0.5608411738006117,0.2923333333333333,0.3751714671158081,0.5206666666666667
|
76 |
-
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.3688382652659246,0.4603333333333333
|
77 |
-
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5783333333333334,0.6938704799615603,0.5783333333333334,0.5482371104670698,1.0
|
78 |
-
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.6446666666666667,0.7230280501918229,0.6446666666666667,0.6455439085887453,0.9973333333333332
|
79 |
-
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.6356666666666667,0.717399441576705,0.6356666666666667,0.647050125518008,0.995
|
80 |
-
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4806666666666667,0.6978154586535756,0.4806666666666667,0.5325218737400426,0.9316666666666666
|
81 |
-
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.4883333333333333,0.6902162639713183,0.4883333333333333,0.5393146850625054,0.904
|
82 |
-
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.595,0.7060453498136213,0.595,0.6271468055875201,0.7173333333333334
|
83 |
-
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6186666666666667,0.7088242726720394,0.6186666666666667,0.6483835468519816,0.574
|
84 |
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
|
85 |
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.63,0.7622571683877091,0.63,0.6151126410759672,0.998
|
86 |
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.677,0.7663956674673086,0.677,0.6770580664953397,0.9796666666666668
|
@@ -88,5 +77,6 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
88 |
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.765,0.7840432806350224,0.765,0.7512220322751986,0.805
|
89 |
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.757,0.7733827213068922,0.757,0.7427592763321033,0.8546666666666667
|
90 |
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.758,0.763149679724481,0.758,0.7376580515312735,0.7563333333333333
|
91 |
-
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.
|
92 |
-
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666668
|
3 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666668
|
4 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333332
|
5 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
|
6 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
|
7 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666668
|
8 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333332
|
9 |
+
0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
|
10 |
+
5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
|
11 |
+
10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666668
|
12 |
+
20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333332
|
13 |
+
30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
|
14 |
+
40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333332
|
15 |
+
50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333332
|
16 |
+
0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
|
17 |
+
5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666668
|
18 |
+
10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333332
|
19 |
+
20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666668
|
20 |
+
30,o1-mini,o1-mini/shots-30,0.7416666666666667,0.7791875084643942,0.7416666666666667,0.7548378729964869,0.9976666666666668
|
21 |
+
40,o1-mini,o1-mini/shots-40,0.7466666666666667,0.7783660257118015,0.7466666666666667,0.7572644424023218,0.9976666666666668
|
22 |
+
50,o1-mini,o1-mini/shots-50,0.75,0.7767849265833893,0.75,0.7590020698968893,0.9976666666666668
|
23 |
+
0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
|
24 |
+
5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
|
25 |
+
10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333332
|
26 |
+
20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333332
|
27 |
+
30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
|
28 |
+
40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
|
29 |
+
50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666668
|
30 |
+
0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
31 |
+
5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7605745196939752,0.7056666666666667,0.7269189565098723,0.9886666666666668
|
32 |
+
10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
|
33 |
+
20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.7690587905035869,0.767,0.7661695279121855,0.979
|
34 |
+
30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7693333333333333,0.7765844200886581,0.7693333333333333,0.7697325957683855,0.7326666666666667
|
35 |
+
40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6813333333333333,0.7801328325609714,0.6813333333333333,0.7066801804415871,0.759
|
36 |
+
50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7173333333333334,0.770076853795054,0.7173333333333334,0.723119179918213,0.6623333333333333
|
37 |
+
0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
|
38 |
+
5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
|
39 |
+
10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
|
40 |
+
20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
|
41 |
+
30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.758,0.7731535340331644,0.758,0.7565012256889623,0.548
|
42 |
+
0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.0116666666666666
|
43 |
+
5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
|
44 |
+
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.1063333333333333
|
45 |
+
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.0826666666666666
|
46 |
+
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.661,0.7422079284443324,0.661,0.6862974695781847,0.07
|
47 |
+
40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6233333333333333,0.7465186818567994,0.6233333333333333,0.6643697809628606,0.0633333333333333
|
48 |
+
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
49 |
+
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.7476666666666667,0.746806876028684,0.7476666666666667,0.7270588443494302,0.999
|
50 |
+
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333332
|
51 |
+
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.647,0.721136036365055,0.647,0.6769738108371004,0.9473333333333334
|
52 |
+
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.6263333333333333,0.7256804685839701,0.6263333333333333,0.6534519727626863,0.9403333333333334
|
53 |
+
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.642,0.7045435138304105,0.642,0.6161646934220135,0.9813333333333332
|
54 |
+
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6166666666666667,0.6959837361921766,0.6166666666666667,0.5567537556050285,0.9803333333333332
|
55 |
+
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
|
56 |
+
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7763333333333333,0.7640598325070357,0.7763333333333333,0.7700878172419743,0.9453333333333334
|
57 |
+
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
|
58 |
+
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.6733333333333333,0.7314610506764355,0.6733333333333333,0.6764198712634657,0.8213333333333334
|
59 |
+
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.6736666666666666,0.7482542000402412,0.6736666666666666,0.6810446770610585,0.8236666666666667
|
60 |
+
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.724,0.7567654663125225,0.724,0.712500180941536,0.8336666666666667
|
61 |
+
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
|
62 |
+
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
63 |
+
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
64 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666668
|
65 |
+
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
66 |
+
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
67 |
+
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
|
68 |
+
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
69 |
+
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.4666666666666667,0.5265074036660548,0.9316666666666666
|
70 |
+
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
71 |
+
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
72 |
+
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
|
74 |
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.63,0.7622571683877091,0.63,0.6151126410759672,0.998
|
75 |
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.677,0.7663956674673086,0.677,0.6770580664953397,0.9796666666666668
|
|
|
77 |
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.765,0.7840432806350224,0.765,0.7512220322751986,0.805
|
78 |
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.757,0.7733827213068922,0.757,0.7427592763321033,0.8546666666666667
|
79 |
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.758,0.763149679724481,0.758,0.7376580515312735,0.7563333333333333
|
80 |
+
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
81 |
+
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666668
|
82 |
+
10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333332
|
data/fine-tuning_metrics.csv
CHANGED
@@ -1,103 +1,103 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.
|
3 |
-
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
4 |
-
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
5 |
-
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
6 |
-
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
7 |
-
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
8 |
-
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
9 |
-
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
10 |
-
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
11 |
-
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
12 |
-
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
13 |
-
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.
|
14 |
-
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
15 |
-
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
16 |
-
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
17 |
-
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
18 |
-
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
19 |
-
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
20 |
-
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
21 |
-
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
22 |
-
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
23 |
-
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
24 |
-
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.
|
25 |
-
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.
|
26 |
-
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.
|
27 |
-
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.
|
28 |
-
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.
|
29 |
-
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.
|
30 |
-
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.
|
31 |
-
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.
|
32 |
-
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.
|
33 |
-
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.
|
34 |
-
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.
|
35 |
-
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.
|
36 |
-
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.
|
37 |
-
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.
|
38 |
-
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.
|
39 |
-
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.
|
40 |
-
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.
|
41 |
-
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.
|
42 |
-
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.
|
43 |
-
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.
|
44 |
-
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.
|
45 |
-
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.
|
46 |
-
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.
|
47 |
-
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.
|
48 |
-
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.
|
49 |
-
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.
|
50 |
-
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.
|
51 |
-
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.
|
52 |
-
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.
|
53 |
-
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.
|
54 |
-
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.
|
55 |
-
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.
|
56 |
-
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.
|
57 |
-
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.
|
58 |
-
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
59 |
-
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
60 |
-
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
61 |
-
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
62 |
-
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
63 |
-
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
64 |
-
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
65 |
-
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
66 |
-
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
67 |
-
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
68 |
-
0.0,Qwen2
|
69 |
-
0.2,Qwen2
|
70 |
-
0.4,Qwen2
|
71 |
-
0.6,Qwen2
|
72 |
-
0.8,Qwen2
|
73 |
-
1.0,Qwen2
|
74 |
-
1.2,Qwen2
|
75 |
-
1.4,Qwen2
|
76 |
-
1.6,Qwen2
|
77 |
-
1.8,Qwen2
|
78 |
-
2.0,Qwen2
|
79 |
-
0.0,Qwen2
|
80 |
-
0.2,Qwen2
|
81 |
-
0.4,Qwen2
|
82 |
-
0.6,Qwen2
|
83 |
-
0.8,Qwen2
|
84 |
-
1.0,Qwen2
|
85 |
-
1.2,Qwen2
|
86 |
-
1.4,Qwen2
|
87 |
-
1.6,Qwen2
|
88 |
-
1.8,Qwen2
|
89 |
-
2.0,Qwen2
|
90 |
-
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.
|
91 |
-
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.
|
92 |
-
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.
|
93 |
-
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.
|
94 |
-
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.
|
95 |
-
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.
|
96 |
-
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.
|
97 |
-
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.
|
98 |
-
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.
|
99 |
-
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.
|
100 |
-
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.
|
101 |
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
|
102 |
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7473333333333333,0.759526705532232,0.7473333333333333,0.7480522291877509,0.998
|
103 |
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.752,0.7774114736945115,0.752,0.7611191332452362,0.9996666666666668
|
@@ -109,14 +109,14 @@ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
109 |
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7726666666666666,0.8006851113573145,0.7726666666666666,0.7813968284378919,0.9996666666666668
|
110 |
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7696666666666667,0.799287702962426,0.7696666666666667,0.7792120245789584,0.9993333333333332
|
111 |
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.769,0.8010881984531473,0.769,0.7793801070552965,0.9996666666666668
|
112 |
-
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.
|
113 |
-
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
114 |
-
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
115 |
-
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
116 |
-
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
117 |
-
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
118 |
-
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
119 |
-
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
120 |
-
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
121 |
-
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
122 |
-
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
|
3 |
+
0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
|
4 |
+
0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333332
|
5 |
+
0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
|
6 |
+
0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
|
7 |
+
1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
8 |
+
1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
|
9 |
+
1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
|
10 |
+
1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
|
11 |
+
1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
|
12 |
+
2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
|
13 |
+
0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
|
14 |
+
0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666668
|
15 |
+
0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
|
16 |
+
0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
|
17 |
+
0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
|
18 |
+
1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
19 |
+
1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
|
20 |
+
1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
|
21 |
+
1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
|
22 |
+
1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
|
23 |
+
2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
|
24 |
+
0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.0116666666666666
|
25 |
+
0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
|
26 |
+
0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
|
27 |
+
0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
|
28 |
+
0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
|
29 |
+
1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
|
30 |
+
1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
|
31 |
+
1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
|
32 |
+
1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
|
33 |
+
1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
|
34 |
+
2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
|
35 |
+
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
36 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
|
37 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
|
38 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
|
39 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
40 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
|
41 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
|
42 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
|
43 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
|
44 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
|
45 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
|
46 |
+
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
|
47 |
+
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
|
48 |
+
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
|
49 |
+
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
|
50 |
+
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
51 |
+
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
|
52 |
+
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
|
53 |
+
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
|
54 |
+
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
|
55 |
+
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
|
56 |
+
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
|
57 |
+
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
58 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
|
59 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
|
60 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
|
61 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
62 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
|
63 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
|
64 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
|
65 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
|
66 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
|
67 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
|
68 |
+
0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
69 |
+
0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666668
|
70 |
+
0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
71 |
+
0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
|
72 |
+
0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
|
73 |
+
1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
|
74 |
+
1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
|
75 |
+
1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666668
|
76 |
+
1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
|
77 |
+
1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
|
78 |
+
2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
|
79 |
+
0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
80 |
+
0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
|
81 |
+
0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
|
82 |
+
0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
|
83 |
+
0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
|
84 |
+
1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
|
85 |
+
1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
|
86 |
+
1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
|
87 |
+
1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
|
88 |
+
1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
89 |
+
2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
|
90 |
+
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.5686666666666667,0.6890626192990656,0.5686666666666667,0.5343419392280258,1.0
|
91 |
+
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6776666666666666,0.7316274074759973,0.6776666666666666,0.6973667827682657,0.999
|
92 |
+
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6653333333333333,0.758958550711478,0.6653333333333333,0.6997203293940804,1.0
|
93 |
+
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.6963333333333334,0.7542353738754336,0.6963333333333334,0.7176790005966858,1.0
|
94 |
+
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.7006666666666667,0.7661325413638986,0.7006666666666667,0.726396504358645,1.0
|
95 |
+
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.6956666666666667,0.766520728596782,0.6956666666666667,0.7232763444431306,1.0
|
96 |
+
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.683,0.7728473029454707,0.6830000000000002,0.7172831663070369,1.0
|
97 |
+
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
|
98 |
+
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
|
99 |
+
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
|
100 |
+
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
|
101 |
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
|
102 |
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7473333333333333,0.759526705532232,0.7473333333333333,0.7480522291877509,0.998
|
103 |
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.752,0.7774114736945115,0.752,0.7611191332452362,0.9996666666666668
|
|
|
109 |
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7726666666666666,0.8006851113573145,0.7726666666666666,0.7813968284378919,0.9996666666666668
|
110 |
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7696666666666667,0.799287702962426,0.7696666666666667,0.7792120245789584,0.9993333333333332
|
111 |
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.769,0.8010881984531473,0.769,0.7793801070552965,0.9996666666666668
|
112 |
+
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
113 |
+
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
|
114 |
+
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
|
115 |
+
0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.7576666666666667,0.8111059140562599,0.7576666666666667,0.778271965273475,1.0
|
116 |
+
0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
|
117 |
+
1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7396666666666667,0.8132229388907013,0.7396666666666667,0.768164418914878,1.0
|
118 |
+
1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7756666666666666,0.8208038975271454,0.7756666666666666,0.7934902567321389,1.0
|
119 |
+
1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.767,0.8089726144740825,0.767,0.7826437373554418,1.0
|
120 |
+
1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7643333333333333,0.8106183296950366,0.7643333333333333,0.7823942859806713,1.0
|
121 |
+
1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.774,0.814496213163251,0.774,0.7893524517536102,1.0
|
122 |
+
2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7673333333333333,0.8134721321251935,0.7673333333333333,0.7849776453559993,1.0
|
data/internlm2_5-20b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.
|
3 |
-
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.
|
4 |
-
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.
|
5 |
-
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.
|
6 |
-
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.
|
7 |
-
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.
|
8 |
-
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.
|
9 |
-
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.
|
10 |
-
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.
|
11 |
-
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.
|
12 |
-
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
3 |
+
0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
|
4 |
+
0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
|
5 |
+
0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
|
6 |
+
0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
7 |
+
1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
|
8 |
+
1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
|
9 |
+
1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
|
10 |
+
1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
|
11 |
+
1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
|
12 |
+
2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
|
data/internlm2_5-20b-chat_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/internlm2_5-20b-chat_shots_metrics.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
data/internlm2_5-7b-chat-1m_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.
|
3 |
-
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.
|
4 |
-
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
|
3 |
+
0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
|
4 |
+
0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
|
5 |
+
0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
|
6 |
+
0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
7 |
+
1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
|
8 |
+
1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
|
9 |
+
1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
|
10 |
+
1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
|
11 |
+
1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
|
12 |
+
2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
|
data/internlm2_5-7b-chat-1m_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/internlm2_5-7b-chat-1m_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.
|
3 |
-
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.
|
4 |
-
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.
|
5 |
-
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.
|
6 |
-
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.
|
7 |
-
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.
|
8 |
-
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
|
3 |
+
5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7763333333333333,0.7640598325070357,0.7763333333333333,0.7700878172419743,0.9453333333333334
|
4 |
+
10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
|
5 |
+
20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.6733333333333333,0.7314610506764355,0.6733333333333333,0.6764198712634657,0.8213333333333334
|
6 |
+
30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.6736666666666666,0.7482542000402412,0.6736666666666666,0.6810446770610585,0.8236666666666667
|
7 |
+
40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.724,0.7567654663125225,0.724,0.712500180941536,0.8336666666666667
|
8 |
+
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
|
data/internlm2_5-7b-chat_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.
|
3 |
-
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.
|
4 |
-
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
+
0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
|
4 |
+
0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
|
5 |
+
0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
|
6 |
+
0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
7 |
+
1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
|
8 |
+
1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
|
9 |
+
1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
|
10 |
+
1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
|
11 |
+
1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
|
12 |
+
2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
|
data/internlm2_5-7b-chat_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/internlm2_5-7b-chat_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.
|
3 |
-
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.
|
4 |
-
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.
|
5 |
-
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.
|
6 |
-
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.
|
7 |
-
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.
|
8 |
-
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
|
3 |
+
5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.7476666666666667,0.746806876028684,0.7476666666666667,0.7270588443494302,0.999
|
4 |
+
10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333333
|
5 |
+
20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.647,0.721136036365055,0.647,0.6769738108371004,0.9473333333333334
|
6 |
+
30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.6263333333333333,0.7256804685839701,0.6263333333333333,0.6534519727626863,0.9403333333333334
|
7 |
+
40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.642,0.7045435138304105,0.642,0.6161646934220135,0.9813333333333333
|
8 |
+
50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6166666666666667,0.6959837361921766,0.6166666666666667,0.5567537556050285,0.9803333333333333
|
data/openai_metrics.csv
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.
|
3 |
-
0,gpt-4o,gpt-4o/shots-00,0.
|
4 |
-
0,o1-mini,o1-mini/shots-00,0.
|
5 |
-
0,o1-preview,o1-preview/shots-00,0.
|
6 |
-
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.
|
7 |
-
5,gpt-4o,gpt-4o/shots-05,0.
|
8 |
-
5,o1-mini,o1-mini/shots-05,0.
|
9 |
-
5,o1-preview,o1-preview/shots-05,0.
|
10 |
-
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.
|
11 |
-
10,gpt-4o,gpt-4o/shots-10,0.
|
12 |
-
10,o1-mini,o1-mini/shots-10,0.
|
13 |
-
10,o1-preview,o1-preview/shots-10,0.
|
14 |
-
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.
|
15 |
-
20,gpt-4o,gpt-4o/shots-20,0.
|
16 |
-
20,o1-mini,o1-mini/shots-20,0.
|
17 |
-
20,o1-preview,o1-preview/shots-20,0.
|
18 |
-
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.
|
19 |
-
30,gpt-4o,gpt-4o/shots-30,0.
|
20 |
-
30,o1-mini,o1-mini/shots-30,0.
|
21 |
-
30,o1-preview,o1-preview/shots-30,0.
|
22 |
-
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.
|
23 |
-
40,gpt-4o,gpt-4o/shots-40,0.
|
24 |
-
40,o1-mini,o1-mini/shots-40,0.
|
25 |
-
40,o1-preview,o1-preview/shots-40,0.
|
26 |
-
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.
|
27 |
-
50,gpt-4o,gpt-4o/shots-50,0.
|
28 |
-
50,o1-mini,o1-mini/shots-50,0.
|
29 |
-
50,o1-preview,o1-preview/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
|
3 |
+
0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
|
4 |
+
0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
|
5 |
+
0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
|
6 |
+
5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
|
7 |
+
5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
|
8 |
+
5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
|
9 |
+
5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
|
10 |
+
10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
|
11 |
+
10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
12 |
+
10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
|
13 |
+
10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
|
14 |
+
20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
|
15 |
+
20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
|
16 |
+
20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
|
17 |
+
20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
|
18 |
+
30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
|
19 |
+
30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
|
20 |
+
30,o1-mini,o1-mini/shots-30,0.7416666666666667,0.7791875084643942,0.7416666666666667,0.7548378729964869,0.9976666666666667
|
21 |
+
30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
|
22 |
+
40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
|
23 |
+
40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
|
24 |
+
40,o1-mini,o1-mini/shots-40,0.7466666666666667,0.7783660257118015,0.7466666666666667,0.7572644424023218,0.9976666666666667
|
25 |
+
40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
|
26 |
+
50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
|
27 |
+
50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
|
28 |
+
50,o1-mini,o1-mini/shots-50,0.75,0.7767849265833893,0.75,0.7590020698968893,0.9976666666666667
|
29 |
+
50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666667
|
data/openai_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
datasets/mgtv/dev.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:449f236786e2105cd1dd0ba5f4a037c3608a03d73a24597e880cc5009e8c53b6
|
3 |
-
size 2741482
|
|
|
|
|
|
|
|
datasets/mgtv/val.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/00_Data Analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c27a6be2814f5be1f48aa22ad45c264d24556d09cb5347629dab1cf0755ab97
|
3 |
+
size 1148416
|
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:734e3db8a49e84a606f2d38ca30e4d0e8ff3cdb2c67684d5825356c023cf08fc
|
3 |
+
size 6385778
|
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f887698f08bdb2f767c6edbb0329171670ab1ae3e53e4eb1b065ea8c175d864
|
3 |
+
size 2385934
|
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef39d306780d8c6717de34adbaa5a7180854019439fa151016007eb8d52f6a05
|
3 |
+
size 6122973
|
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71fbd8541e14161d6ff7a5668ce60021cda2bd703819dab6bb405cc6c25553e3
|
3 |
+
size 14778268
|
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cda076bb798749bdd0a680484547091497d7bd4dc67be75fd40bc75e0651b27
|
3 |
+
size 1789053
|
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:223ce2a464dbad51d4f2cb02e50bcc656c2e1774366f30051e7a4a10505c3c0a
|
3 |
+
size 2080974
|