dh-mc commited on
Commit
11f2c15
·
1 Parent(s): 8410e14

fix bug in perf calc

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. data/Llama3.1-70B-Chinese-Chat_metrics.csv +11 -11
  2. data/Llama3.1-70B-Chinese-Chat_results.csv +0 -0
  3. data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv +5 -5
  4. data/Llama3.1-8B-Chinese-Chat_metrics.csv +11 -11
  5. data/Llama3.1-8B-Chinese-Chat_results.csv +0 -0
  6. data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv +7 -7
  7. data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv +11 -11
  8. data/Mistral-7B-v0.3-Chinese-Chat_results.csv +0 -0
  9. data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +6 -6
  10. data/Qwen2-72B-Instruct_metrics.csv +11 -11
  11. data/Qwen2-72B-Instruct_shots_metrics.csv +1 -1
  12. data/Qwen2-7B-Instruct_metrics.csv +11 -11
  13. data/Qwen2-7B-Instruct_shots_metrics.csv +2 -2
  14. data/Qwen2.5-0.5B-Instruct_metrics.csv +11 -11
  15. data/Qwen2.5-0.5B-Instruct_results.csv +0 -0
  16. data/Qwen2.5-0.5B-Instruct_shots_metrics.csv +7 -7
  17. data/Qwen2.5-1.5B-Instruct_metrics.csv +1 -1
  18. data/Qwen2.5-1.5B-Instruct_results.csv +0 -0
  19. data/Qwen2.5-1.5B-Instruct_shots_metrics.csv +1 -1
  20. data/Qwen2.5-3B-Instruct_metrics.csv +11 -11
  21. data/Qwen2.5-3B-Instruct_results.csv +0 -0
  22. data/Qwen2.5-3B-Instruct_shots_metrics.csv +7 -7
  23. data/Qwen2.5-72B-Instruct_metrics.csv +11 -11
  24. data/Qwen2.5-72B-Instruct_results.csv +0 -0
  25. data/Qwen2.5-72B-Instruct_shots_metrics.csv +3 -2
  26. data/Qwen2.5-7B-Instruct_results.csv +0 -0
  27. data/best_metrics.csv +17 -15
  28. data/best_results.csv +0 -0
  29. data/few-shots_metrics.csv +74 -84
  30. data/fine-tuning_metrics.csv +110 -110
  31. data/internlm2_5-20b-chat_metrics.csv +11 -11
  32. data/internlm2_5-20b-chat_results.csv +0 -0
  33. data/internlm2_5-20b-chat_shots_metrics.csv +1 -1
  34. data/internlm2_5-7b-chat-1m_metrics.csv +11 -11
  35. data/internlm2_5-7b-chat-1m_results.csv +0 -0
  36. data/internlm2_5-7b-chat-1m_shots_metrics.csv +7 -7
  37. data/internlm2_5-7b-chat_metrics.csv +11 -11
  38. data/internlm2_5-7b-chat_results.csv +0 -0
  39. data/internlm2_5-7b-chat_shots_metrics.csv +7 -7
  40. data/openai_metrics.csv +28 -28
  41. data/openai_results.csv +0 -0
  42. datasets/mgtv/dev.csv +0 -3
  43. datasets/mgtv/val.csv +0 -0
  44. notebooks/00_Data Analysis.ipynb +2 -2
  45. notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +2 -2
  46. notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +2 -2
  47. notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +2 -2
  48. notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +2 -2
  49. notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +2 -2
  50. notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +2 -2
data/Llama3.1-70B-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
3
- 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666667
4
- 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
5
- 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
6
- 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
7
- 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
8
- 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
9
- 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
10
- 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
11
- 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
12
- 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
3
+ 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666667
4
+ 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
5
+ 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
6
+ 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
7
+ 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
8
+ 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
9
+ 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
10
+ 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
11
+ 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
12
+ 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
data/Llama3.1-70B-Chinese-Chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Llama3.1-70B-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,6 +1,6 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.009666666666666667
3
- 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
4
- 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
5
- 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
6
- 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.009666666666666667
3
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
4
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
5
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
6
+ 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.758,0.7731535340331644,0.758,0.7565012256889623,0.548
data/Llama3.1-8B-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
- 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
4
- 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333333
5
- 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
6
- 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
7
- 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
8
- 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
9
- 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
10
- 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
11
- 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
12
- 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
3
+ 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
4
+ 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333333
5
+ 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
6
+ 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
7
+ 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
8
+ 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
9
+ 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
10
+ 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
11
+ 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
12
+ 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
data/Llama3.1-8B-Chinese-Chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Llama3.1-8B-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
- 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666667
4
- 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
5
- 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
6
- 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
7
- 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
8
- 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
3
+ 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7605745196939752,0.7056666666666667,0.7269189565098723,0.9886666666666667
4
+ 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
5
+ 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.7690587905035869,0.767,0.7661695279121855,0.979
6
+ 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7693333333333333,0.7765844200886581,0.7693333333333333,0.7697325957683855,0.7326666666666667
7
+ 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6813333333333333,0.7801328325609714,0.6813333333333333,0.7066801804415871,0.759
8
+ 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7173333333333334,0.770076853795054,0.7173333333333334,0.723119179918213,0.6623333333333333
data/Mistral-7B-v0.3-Chinese-Chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
3
- 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
4
- 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
5
- 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
6
- 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
7
- 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
8
- 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
9
- 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
10
- 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
11
- 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
12
- 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
+ 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
4
+ 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
5
+ 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
6
+ 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
7
+ 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
8
+ 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
9
+ 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
10
+ 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
11
+ 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
12
+ 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
data/Mistral-7B-v0.3-Chinese-Chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv CHANGED
@@ -1,7 +1,7 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.011666666666666667
3
- 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
4
- 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
5
- 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
6
- 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
7
- 40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6306666666666667,0.7422868762493116,0.6306666666666667,0.6670711390706651,0.06333333333333334
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.011666666666666667
3
+ 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
4
+ 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.10633333333333334
5
+ 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.08266666666666667
6
+ 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.661,0.7422079284443324,0.661,0.6862974695781847,0.07
7
+ 40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6233333333333333,0.7465186818567994,0.6233333333333333,0.6643697809628606,0.06333333333333334
data/Qwen2-72B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
3
- 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.772,0.8214192168152544,0.772,0.7910898276003457,1.0
4
- 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.757,0.828747966447233,0.757,0.783516715780864,1.0
5
- 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.772,0.8277697933855978,0.772,0.7932982172336923,1.0
6
- 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8036666666666666,0.8277228453985896,0.8036666666666666,0.8136774676398189,1.0
7
- 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.753,0.8267761287574541,0.753,0.7793434248302783,1.0
8
- 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7793333333333333,0.8358618807490109,0.7793333333333333,0.800734522365308,1.0
9
- 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7883333333333333,0.8390667295473608,0.7883333333333333,0.8075446360016978,1.0
10
- 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7856666666666666,0.8333912862981965,0.7856666666666666,0.8038536915174684,1.0
11
- 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.805,0.8442903406198344,0.805,0.8197956174225439,1.0
12
- 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7936666666666666,0.8399561173931658,0.7936666666666666,0.8112524138737499,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
3
+ 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
4
+ 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
5
+ 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
6
+ 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
7
+ 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
8
+ 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
9
+ 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
10
+ 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
11
+ 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
12
+ 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
data/Qwen2-72B-Instruct_shots_metrics.csv CHANGED
@@ -1,2 +1,2 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.757,0.7973819870472458,0.757,0.7602606947698078,0.9773333333333334
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
data/Qwen2-7B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
3
- 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.7313333333333333,0.7782207073448913,0.7313333333333333,0.7498580605712221,0.9996666666666667
4
- 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.767,0.7975691979811874,0.767,0.7784908005204111,1.0
5
- 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.706,0.8028770302127605,0.706,0.7396402026345186,1.0
6
- 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.7313333333333333,0.7899967378450532,0.7313333333333333,0.7491181057755286,1.0
7
- 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6853333333333333,0.7776902509375624,0.6853333333333333,0.7122906026955259,1.0
8
- 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7156666666666667,0.7981854285684257,0.7156666666666667,0.7440952985881264,1.0
9
- 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.743,0.7909260776868464,0.743,0.7603582063225583,0.9996666666666667
10
- 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.7106666666666667,0.7844615294470283,0.7106666666666667,0.7354379359862141,1.0
11
- 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6926666666666667,0.7852752054045592,0.6926666666666667,0.7234458732476875,1.0
12
- 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.6996666666666667,0.7892137201429604,0.6996666666666667,0.7296312152658814,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
3
+ 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666667
4
+ 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
5
+ 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
6
+ 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
7
+ 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
8
+ 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
9
+ 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666667
10
+ 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
11
+ 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
12
+ 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
data/Qwen2-7B-Instruct_shots_metrics.csv CHANGED
@@ -1,3 +1,3 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.6853333333333333,0.7434931541561965,0.6853333333333333,0.7090778261894969,0.9996666666666667
3
- 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5723333333333334,0.738817429885796,0.5723333333333334,0.6112549880619311,0.9896666666666667
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
3
+ 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666667
data/Qwen2.5-0.5B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
- 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
4
- 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
5
- 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.44633333333333336,0.6477441598024034,0.44633333333333336,0.4917457459702999,1.0
6
- 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
7
- 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
8
- 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
9
- 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
10
- 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
11
- 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
12
- 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
3
+ 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.525,0.5819221558338251,0.525,0.4586682135998428,1.0
4
+ 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.54,0.6445255881472232,0.54,0.5293020271128788,1.0
5
+ 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.43766666666666665,0.6565760150511494,0.43766666666666665,0.49167707971005714,1.0
6
+ 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.49933333333333335,0.6513093602943617,0.49933333333333335,0.49913143191054443,1.0
7
+ 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.5523333333333333,0.6622075519433389,0.5523333333333333,0.5627283867177305,1.0
8
+ 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5403333333333333,0.64319564963495,0.5403333333333333,0.5598419070210608,1.0
9
+ 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5843333333333334,0.6559808590166016,0.5843333333333334,0.6086767064128167,1.0
10
+ 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5216666666666666,0.6604678981061621,0.5216666666666666,0.5615446578399996,1.0
11
+ 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.524,0.6673441240188523,0.524,0.5607458201939703,1.0
12
+ 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.507,0.6597337077954278,0.5070000000000001,0.5492280882625964,1.0
data/Qwen2.5-0.5B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-0.5B-Instruct_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
- 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.17966666666666667,0.47516573853109806,0.17966666666666667,0.214144872117911,0.004
4
- 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.39097839594031075,0.068
5
- 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43366666666666664,0.513186330900278,0.43366666666666664,0.463747974034812,0.37266666666666665
6
- 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.07566666666666666
7
- 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.49542975613961904,0.324
8
- 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.24333333333333335
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.443,0.5490534863315207,0.443,0.43178235266224163,0.594
3
+ 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.177,0.49074939459487404,0.177,0.2155165894788838,0.004
4
+ 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.35433333333333333,0.5213384036972462,0.35433333333333333,0.39783362635065245,0.068
5
+ 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43666666666666665,0.5234006681691764,0.43666666666666665,0.4691719255495575,0.37266666666666665
6
+ 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39066666666666666,0.5462493905687185,0.39066666666666666,0.4339604066000981,0.07566666666666666
7
+ 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.4653333333333333,0.5468189581246721,0.4653333333333333,0.49752341605759137,0.324
8
+ 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.5026666666666667,0.5610230233594029,0.5026666666666667,0.5163435163649445,0.24333333333333335
data/Qwen2.5-1.5B-Instruct_metrics.csv CHANGED
@@ -1,5 +1,5 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.178,0.5082549494185494,0.17800000000000005,0.2206670187667368,0.9403333333333334
3
  0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.521,0.6393141994049955,0.521,0.5543058103456981,1.0
4
  0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5786666666666667,0.6827334710464682,0.5786666666666667,0.6055896299128966,1.0
5
  0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.544,0.7064593462910856,0.544,0.5946365105633672,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
3
  0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.521,0.6393141994049955,0.521,0.5543058103456981,1.0
4
  0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5786666666666667,0.6827334710464682,0.5786666666666667,0.6055896299128966,1.0
5
  0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.544,0.7064593462910856,0.544,0.5946365105633672,1.0
data/Qwen2.5-1.5B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-1.5B-Instruct_shots_metrics.csv CHANGED
@@ -2,7 +2,7 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
  0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
3
  5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.349,0.5695965528635436,0.349,0.3771117506970461,0.9756666666666667
4
  10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.457,0.5932373185073849,0.457,0.4641792696031706,0.9933333333333333
5
- 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
6
  30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
7
  40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
8
  50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
 
2
  0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.18366666666666667,0.5244570465301668,0.18366666666666667,0.23286492799102732,0.931
3
  5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.349,0.5695965528635436,0.349,0.3771117506970461,0.9756666666666667
4
  10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.457,0.5932373185073849,0.457,0.4641792696031706,0.9933333333333333
5
+ 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.24166666666666667,0.5333408149946145,0.24166666666666667,0.30859243868426434,0.8263333333333334
6
  30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
7
  40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
8
  50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
data/Qwen2.5-3B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.569,0.6886829973126811,0.569,0.5333701103243736,1.0
3
- 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6833333333333333,0.7269965624622317,0.6833333333333333,0.6985990460224034,0.999
4
- 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6766666666666666,0.7583682510610537,0.6766666666666666,0.705917900971524,1.0
5
- 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7023333333333334,0.7500816082620184,0.7023333333333334,0.7196546370690564,1.0
6
- 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.71,0.7658586215313521,0.71,0.7318979017034846,1.0
7
- 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.7053333333333334,0.7645466069416816,0.7053333333333334,0.7278931369071717,1.0
8
- 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.6943333333333334,0.7711644797484947,0.6943333333333334,0.7225635970673485,1.0
9
- 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7303333333333333,0.7695138336135122,0.7303333333333333,0.7445711153936881,1.0
10
- 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7273333333333334,0.7726772149368513,0.7273333333333334,0.7426310656072148,1.0
11
- 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7093333333333334,0.7726776026356509,0.7093333333333334,0.7321516443823387,1.0
12
- 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.7166666666666667,0.7741275713911147,0.7166666666666667,0.7370173522943904,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.5686666666666667,0.6890626192990656,0.5686666666666667,0.5343419392280258,1.0
3
+ 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6776666666666666,0.7316274074759973,0.6776666666666666,0.6973667827682657,0.999
4
+ 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6653333333333333,0.758958550711478,0.6653333333333333,0.6997203293940804,1.0
5
+ 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.6963333333333334,0.7542353738754336,0.6963333333333334,0.7176790005966858,1.0
6
+ 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.7006666666666667,0.7661325413638986,0.7006666666666667,0.726396504358645,1.0
7
+ 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.6956666666666667,0.766520728596782,0.6956666666666667,0.7232763444431306,1.0
8
+ 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.683,0.7728473029454707,0.6830000000000002,0.7172831663070369,1.0
9
+ 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
10
+ 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
11
+ 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
12
+ 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
data/Qwen2.5-3B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-3B-Instruct_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5783333333333334,0.6938704799615603,0.5783333333333334,0.5482371104670698,1.0
3
- 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.6446666666666667,0.7230280501918229,0.6446666666666667,0.6455439085887453,0.9973333333333333
4
- 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.6356666666666667,0.717399441576705,0.6356666666666667,0.647050125518008,0.995
5
- 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4806666666666667,0.6978154586535756,0.4806666666666667,0.5325218737400426,0.9316666666666666
6
- 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.48833333333333334,0.6902162639713183,0.48833333333333334,0.5393146850625054,0.904
7
- 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.595,0.7060453498136213,0.595,0.6271468055875201,0.7173333333333334
8
- 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6186666666666667,0.7088242726720394,0.6186666666666667,0.6483835468519816,0.574
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
3
+ 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333333
4
+ 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
5
+ 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.46666666666666673,0.5265074036660548,0.9316666666666666
6
+ 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
7
+ 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
8
+ 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
data/Qwen2.5-72B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
3
- 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
4
- 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
5
- 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
6
- 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
7
- 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
8
- 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
9
- 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
10
- 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
11
- 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
12
- 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
3
+ 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
4
+ 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
5
+ 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.7576666666666667,0.8111059140562599,0.7576666666666667,0.778271965273475,1.0
6
+ 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
7
+ 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7396666666666667,0.8132229388907013,0.7396666666666667,0.768164418914878,1.0
8
+ 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7756666666666666,0.8208038975271454,0.7756666666666666,0.7934902567321389,1.0
9
+ 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.767,0.8089726144740825,0.767,0.7826437373554418,1.0
10
+ 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7643333333333333,0.8106183296950366,0.7643333333333333,0.7823942859806713,1.0
11
+ 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.774,0.814496213163251,0.774,0.7893524517536102,1.0
12
+ 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7673333333333333,0.8134721321251935,0.7673333333333333,0.7849776453559993,1.0
data/Qwen2.5-72B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-72B-Instruct_shots_metrics.csv CHANGED
@@ -1,3 +1,4 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
3
- 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666667
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
3
+ 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666667
4
+ 10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333333
data/Qwen2.5-7B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/best_metrics.csv CHANGED
@@ -1,16 +1,18 @@
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 1,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
3
- 2,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
4
- 3,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
5
- 4,InternLM2.5-7B (1.4-epoch),InternLM2.5-7B (1.4-epoch),0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
6
- 5,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
7
- 6,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
8
- 7,Qwen2.5-0.5B (1.4-epoch),Qwen2.5-0.5B (1.4-epoch),0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
9
- 8,Qwen2.5-1.5B (0.8-epoch),Qwen2.5-1.5B (0.8-epoch),0.659,0.7267092412287238,0.659,0.6825875108247536,1.0
10
- 9,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7303333333333333,0.7695138336135122,0.7303333333333333,0.7445711153936881,1.0
11
- 10,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.782,0.8023938029436536,0.782,0.7888740758699296,0.9993333333333333
12
- 11,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
13
- 12,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,1.0
14
- 13,gpt-4o (10-shot),gpt-4o (10-shot),0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
15
- 14,o1-mini (50-shot),o1-mini (50-shot),0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,1.0
16
- 15,o1-preview (50-shot),o1-preview (50-shot),0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9996666666666667
 
 
 
1
  index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 1,truth,truth,0.0,0.0,0.0,0.0,0.0
3
+ 2,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
4
+ 3,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
5
+ 4,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
6
+ 5,InternLM2.5-7B (0.8-epoch),InternLM2.5-7B (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
+ 6,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
8
+ 7,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
9
+ 8,Qwen2-7B (0.4-epoch),Qwen2-7B (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
10
+ 9,Qwen2-72B (1.8-epoch),Qwen2-72B (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
11
+ 10,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
12
+ 11,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333333
13
+ 12,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
14
+ 13,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
15
+ 14,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
16
+ 15,o1-mini (50-shot),o1-mini (50-shot),0.75,0.7767849265833893,0.75,0.7590020698968893,1.0
17
+ 16,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
18
+ 17,Qwen2.5-72B (10-shot),Qwen2.5-72B (10-shot),0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.998
data/best_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/few-shots_metrics.csv CHANGED
@@ -1,86 +1,75 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666668
3
- 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666668
4
- 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333332
5
- 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
6
- 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
7
- 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666668
8
- 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333332
9
- 0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
10
- 5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
11
- 10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666668
12
- 20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333332
13
- 30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
14
- 40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333332
15
- 50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333332
16
- 0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
17
- 5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666668
18
- 10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333332
19
- 20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666668
20
- 30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666668
21
- 40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666668
22
- 50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666668
23
- 0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
24
- 5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
25
- 10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333332
26
- 20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333332
27
- 30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
28
- 40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
29
- 50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666668
30
- 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
31
- 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7508515184863084,0.7056666666666667,0.7230574380518462,0.9886666666666668
32
- 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6736666666666666,0.7776004745989736,0.6736666666666666,0.7094104807112239,0.9623333333333334
33
- 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.764982587229615,0.767,0.7638473265780445,0.979
34
- 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7713333333333333,0.7725685630276532,0.7713333333333333,0.7692692690410152,0.7326666666666667
35
- 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6873333333333334,0.773294758147205,0.6873333333333334,0.7075877720686631,0.759
36
- 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7176666666666667,0.7599215931134234,0.7176666666666667,0.7203550920641806,0.6623333333333333
37
- 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
38
- 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.754,0.7675695134276339,0.754,0.7530665717237273,0.79
39
- 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.756,0.7695738042762151,0.756,0.7563878737797524,0.8326666666666667
40
- 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.7406666666666667,0.7560876641054418,0.7406666666666667,0.7360011002310723,0.819
41
- 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.7603333333333333,0.7710641222872985,0.7603333333333333,0.7570501796584528,0.548
42
- 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
43
- 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6546666666666666,0.7415422757067709,0.6546666666666666,0.684189810233595,0.142
44
- 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.1063333333333333
45
- 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.0826666666666666
46
- 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
47
- 40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6306666666666667,0.7422868762493116,0.6306666666666667,0.6670711390706651,0.0633333333333333
48
- 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
49
- 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
50
- 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333332
51
- 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
52
- 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
53
- 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333332
54
- 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333332
55
- 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
56
- 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
57
- 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
58
- 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
59
- 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
60
- 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
61
- 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
62
- 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
63
- 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
64
- 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.1796666666666666,0.475165738531098,0.1796666666666666,0.214144872117911,0.004
65
- 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.3909783959403107,0.068
66
- 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.4336666666666666,0.513186330900278,0.4336666666666666,0.463747974034812,0.3726666666666666
67
- 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.0756666666666666
68
- 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.495429756139619,0.324
69
- 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.2433333333333333
70
- 0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.1836666666666666,0.5244570465301668,0.1836666666666666,0.2328649279910273,0.931
71
- 5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.349,0.5695965528635436,0.349,0.3771117506970461,0.9756666666666668
72
- 10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
73
- 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
74
- 30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
75
- 40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.2923333333333333,0.5608411738006117,0.2923333333333333,0.3751714671158081,0.5206666666666667
76
- 50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.3688382652659246,0.4603333333333333
77
- 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5783333333333334,0.6938704799615603,0.5783333333333334,0.5482371104670698,1.0
78
- 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.6446666666666667,0.7230280501918229,0.6446666666666667,0.6455439085887453,0.9973333333333332
79
- 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.6356666666666667,0.717399441576705,0.6356666666666667,0.647050125518008,0.995
80
- 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4806666666666667,0.6978154586535756,0.4806666666666667,0.5325218737400426,0.9316666666666666
81
- 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.4883333333333333,0.6902162639713183,0.4883333333333333,0.5393146850625054,0.904
82
- 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.595,0.7060453498136213,0.595,0.6271468055875201,0.7173333333333334
83
- 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6186666666666667,0.7088242726720394,0.6186666666666667,0.6483835468519816,0.574
84
  0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
85
  5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.63,0.7622571683877091,0.63,0.6151126410759672,0.998
86
  10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.677,0.7663956674673086,0.677,0.6770580664953397,0.9796666666666668
@@ -88,5 +77,6 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
88
  30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.765,0.7840432806350224,0.765,0.7512220322751986,0.805
89
  40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.757,0.7733827213068922,0.757,0.7427592763321033,0.8546666666666667
90
  50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.758,0.763149679724481,0.758,0.7376580515312735,0.7563333333333333
91
- 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
92
- 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.819,0.8182324679666184,0.819,0.8095367865845521,0.9416666666666668
 
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666668
3
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666668
4
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333332
5
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
6
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
7
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666668
8
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333332
9
+ 0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
10
+ 5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
11
+ 10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666668
12
+ 20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333332
13
+ 30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
14
+ 40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333332
15
+ 50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333332
16
+ 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
17
+ 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666668
18
+ 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333332
19
+ 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666668
20
+ 30,o1-mini,o1-mini/shots-30,0.7416666666666667,0.7791875084643942,0.7416666666666667,0.7548378729964869,0.9976666666666668
21
+ 40,o1-mini,o1-mini/shots-40,0.7466666666666667,0.7783660257118015,0.7466666666666667,0.7572644424023218,0.9976666666666668
22
+ 50,o1-mini,o1-mini/shots-50,0.75,0.7767849265833893,0.75,0.7590020698968893,0.9976666666666668
23
+ 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
24
+ 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
25
+ 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333332
26
+ 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333332
27
+ 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
28
+ 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
29
+ 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666668
30
+ 0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-00,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
31
+ 5,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-05,0.7056666666666667,0.7605745196939752,0.7056666666666667,0.7269189565098723,0.9886666666666668
32
+ 10,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-10,0.6676666666666666,0.7834080522821993,0.6676666666666666,0.7082605860921491,0.9623333333333334
33
+ 20,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-20,0.767,0.7690587905035869,0.767,0.7661695279121855,0.979
34
+ 30,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-30,0.7693333333333333,0.7765844200886581,0.7693333333333333,0.7697325957683855,0.7326666666666667
35
+ 40,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-40,0.6813333333333333,0.7801328325609714,0.6813333333333333,0.7066801804415871,0.759
36
+ 50,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/shots-50,0.7173333333333334,0.770076853795054,0.7173333333333334,0.723119179918213,0.6623333333333333
37
+ 0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-00,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
38
+ 5,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-05,0.7536666666666667,0.772126097633354,0.7536666666666667,0.7545029613768596,0.79
39
+ 10,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-10,0.754,0.7729477984842943,0.754,0.756682017266956,0.8326666666666667
40
+ 20,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-20,0.738,0.7566938786102072,0.738,0.7348961489952073,0.819
41
+ 30,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/shots-30,0.758,0.7731535340331644,0.758,0.7565012256889623,0.548
42
+ 0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-00,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.0116666666666666
43
+ 5,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-05,0.6446666666666667,0.7451807329096397,0.6446666666666667,0.681030628954011,0.142
44
+ 10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.6036666666666667,0.7334913867282189,0.6036666666666667,0.6493185547247415,0.1063333333333333
45
+ 20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6276666666666667,0.7398894455389585,0.6276666666666667,0.6690543758928521,0.0826666666666666
46
+ 30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.661,0.7422079284443324,0.661,0.6862974695781847,0.07
47
+ 40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6233333333333333,0.7465186818567994,0.6233333333333333,0.6643697809628606,0.0633333333333333
48
+ 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
49
+ 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.7476666666666667,0.746806876028684,0.7476666666666667,0.7270588443494302,0.999
50
+ 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333332
51
+ 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.647,0.721136036365055,0.647,0.6769738108371004,0.9473333333333334
52
+ 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.6263333333333333,0.7256804685839701,0.6263333333333333,0.6534519727626863,0.9403333333333334
53
+ 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.642,0.7045435138304105,0.642,0.6161646934220135,0.9813333333333332
54
+ 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6166666666666667,0.6959837361921766,0.6166666666666667,0.5567537556050285,0.9803333333333332
55
+ 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
56
+ 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7763333333333333,0.7640598325070357,0.7763333333333333,0.7700878172419743,0.9453333333333334
57
+ 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
58
+ 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.6733333333333333,0.7314610506764355,0.6733333333333333,0.6764198712634657,0.8213333333333334
59
+ 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.6736666666666666,0.7482542000402412,0.6736666666666666,0.6810446770610585,0.8236666666666667
60
+ 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.724,0.7567654663125225,0.724,0.712500180941536,0.8336666666666667
61
+ 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
62
+ 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
63
+ 0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
64
+ 10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5646666666666667,0.7391197908117386,0.5646666666666667,0.6064049121095652,0.9896666666666668
65
+ 0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
66
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
67
+ 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
68
+ 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
69
+ 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.4666666666666667,0.5265074036660548,0.9316666666666666
70
+ 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
71
+ 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
72
+ 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
 
 
 
 
 
 
 
 
 
 
 
73
  0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
74
  5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.63,0.7622571683877091,0.63,0.6151126410759672,0.998
75
  10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.677,0.7663956674673086,0.677,0.6770580664953397,0.9796666666666668
 
77
  30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.765,0.7840432806350224,0.765,0.7512220322751986,0.805
78
  40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.757,0.7733827213068922,0.757,0.7427592763321033,0.8546666666666667
79
  50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.758,0.763149679724481,0.758,0.7376580515312735,0.7563333333333333
80
+ 0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
81
+ 5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666668
82
+ 10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333332
data/fine-tuning_metrics.csv CHANGED
@@ -1,103 +1,103 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.7343333333333333,0.7375752740091942,0.7343333333333333,0.7270283652909943,0.8033333333333333
3
- 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.717,0.7933072428707201,0.717,0.7447412977676989,1.0
4
- 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7226666666666667,0.7983383063141186,0.7226666666666667,0.7489397350174751,0.9993333333333332
5
- 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.7083333333333334,0.7967030927405547,0.7083333333333334,0.738836849803633,1.0
6
- 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7773333333333333,0.805139129977305,0.7773333333333333,0.7882159693114585,1.0
7
- 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7853333333333333,0.8062405645226312,0.7853333333333333,0.7938991590982061,1.0
8
- 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7436666666666667,0.8148316221752646,0.7436666666666667,0.7689773286065246,1.0
9
- 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.759,0.8080929326806991,0.759,0.7772842274293189,1.0
10
- 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.745,0.8027959680086005,0.745,0.7666181725503965,1.0
11
- 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7303333333333333,0.806805925253305,0.7303333333333333,0.7580841794383364,1.0
12
- 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.737,0.808786608325944,0.737,0.7629963845364953,1.0
13
- 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7646666666666667,0.7804609488644828,0.7646666666666667,0.7497548621711109,0.0096666666666666
14
- 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.784,0.8105343792887019,0.784,0.7931742141608462,0.9996666666666668
15
- 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7426666666666667,0.8117033235947096,0.7426666666666667,0.7673825750808414,1.0
16
- 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.736,0.8227236574891071,0.736,0.7650739090144549,1.0
17
- 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7686666666666667,0.8259659464402258,0.7686666666666667,0.7880870865039342,1.0
18
- 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.809,0.8282732906153989,0.809,0.8166997776775797,1.0
19
- 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.75,0.8287348768409003,0.75,0.7741734526674708,1.0
20
- 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7703333333333333,0.8271894042316865,0.7703333333333333,0.7907617274354051,1.0
21
- 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.776,0.8315436250878178,0.776,0.7959870550088912,1.0
22
- 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7733333333333333,0.8327336470976,0.7733333333333333,0.7947537193805649,1.0
23
- 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7686666666666667,0.8329633784586954,0.7686666666666667,0.7914454794587963,1.0
24
- 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6923333333333334,0.7009179792741449,0.6923333333333334,0.6605899639694456,0.0116666666666666
25
- 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.706,0.7832545046834243,0.706,0.7323466131711432,1.0
26
- 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7476666666666667,0.7836120158306894,0.7476666666666667,0.7557791381509955,1.0
27
- 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6736666666666666,0.7908140272002406,0.6736666666666666,0.7129951145360993,1.0
28
- 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7293333333333333,0.788387677637057,0.7293333333333333,0.7494137469900564,1.0
29
- 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.74,0.7833068129490098,0.74,0.7499935485741815,1.0
30
- 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7146666666666667,0.7890760288118991,0.7146666666666667,0.7411240160229633,1.0
31
- 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.7616666666666667,0.789634957005121,0.7616666666666667,0.7721210086098353,1.0
32
- 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7296666666666667,0.7854982015370922,0.7296666666666667,0.7491267995936699,1.0
33
- 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7076666666666667,0.7877874532247918,0.7076666666666667,0.7346283562321456,1.0
34
- 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.713,0.7895690867103055,0.713,0.739013227401175,1.0
35
- 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
36
- 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
37
- 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
38
- 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
39
- 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
40
- 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
41
- 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
42
- 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
43
- 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
44
- 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
45
- 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
46
- 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4923333333333333,0.7570993062022159,0.4923333333333333,0.5279738886353613,0.9986666666666668
47
- 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
48
- 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
49
- 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
50
- 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
51
- 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
52
- 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
53
- 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
54
- 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
55
- 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
56
- 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
57
- 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
58
- 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
59
- 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
60
- 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
61
- 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
62
- 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
63
- 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
64
- 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
65
- 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
66
- 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
67
- 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
68
- 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.4383333333333333,0.5292917259914629,0.4383333333333333,0.4228687599248655,0.594
69
- 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
70
- 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
71
- 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.4463333333333333,0.6477441598024034,0.4463333333333333,0.4917457459702999,1.0
72
- 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
73
- 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
74
- 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
75
- 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
76
- 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
77
- 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
78
- 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
79
- 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.178,0.5082549494185494,0.178,0.2206670187667368,0.9403333333333334
80
- 0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.521,0.6393141994049955,0.521,0.5543058103456981,1.0
81
- 0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5786666666666667,0.6827334710464682,0.5786666666666667,0.6055896299128966,1.0
82
- 0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.544,0.7064593462910856,0.544,0.5946365105633672,1.0
83
- 0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.bfloat16_lf,0.659,0.7267092412287238,0.659,0.6825875108247536,1.0
84
- 1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.bfloat16_lf,0.637,0.7191389576964738,0.637,0.6562859054038414,1.0
85
- 1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.bfloat16_lf,0.6086666666666667,0.7293412868960213,0.6086666666666667,0.6479350184617141,1.0
86
- 1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.bfloat16_lf,0.6326666666666667,0.716380475510422,0.6326666666666667,0.6591217616290708,1.0
87
- 1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.bfloat16_lf,0.6273333333333333,0.7224778228100358,0.6273333333333333,0.6551405164716649,1.0
88
- 1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.bfloat16_lf,0.5973333333333334,0.7263124149931549,0.5973333333333334,0.6349391744052281,1.0
89
- 2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.bfloat16_lf,0.6046666666666667,0.7203284046544999,0.6046666666666667,0.6377776248713325,1.0
90
- 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.569,0.6886829973126811,0.569,0.5333701103243736,1.0
91
- 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6833333333333333,0.7269965624622317,0.6833333333333333,0.6985990460224034,0.999
92
- 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6766666666666666,0.7583682510610537,0.6766666666666666,0.705917900971524,1.0
93
- 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7023333333333334,0.7500816082620184,0.7023333333333334,0.7196546370690564,1.0
94
- 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.71,0.7658586215313521,0.71,0.7318979017034846,1.0
95
- 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.7053333333333334,0.7645466069416816,0.7053333333333334,0.7278931369071717,1.0
96
- 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.6943333333333334,0.7711644797484947,0.6943333333333334,0.7225635970673485,1.0
97
- 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7303333333333333,0.7695138336135122,0.7303333333333333,0.7445711153936881,1.0
98
- 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7273333333333334,0.7726772149368513,0.7273333333333334,0.7426310656072148,1.0
99
- 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7093333333333334,0.7726776026356509,0.7093333333333334,0.7321516443823387,1.0
100
- 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.7166666666666667,0.7741275713911147,0.7166666666666667,0.7370173522943904,1.0
101
  0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
102
  0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7473333333333333,0.759526705532232,0.7473333333333333,0.7480522291877509,0.998
103
  0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.752,0.7774114736945115,0.752,0.7611191332452362,0.9996666666666668
@@ -109,14 +109,14 @@ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
109
  1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7726666666666666,0.8006851113573145,0.7726666666666666,0.7813968284378919,0.9996666666666668
110
  1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7696666666666667,0.799287702962426,0.7696666666666667,0.7792120245789584,0.9993333333333332
111
  2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.769,0.8010881984531473,0.769,0.7793801070552965,0.9996666666666668
112
- 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7956666666666666,0.8098073411161181,0.7956666666666666,0.7771317592221199,0.994
113
- 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.792,0.8180793658647517,0.792,0.80166512366027,1.0
114
- 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7716666666666666,0.8199569804721152,0.7716666666666666,0.7895879011938259,1.0
115
- 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.798,0.8379062379534957,0.798,0.812148680520218,1.0
116
- 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.8213333333333334,0.8447926258362122,0.8213333333333334,0.8299486611547571,1.0
117
- 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7643333333333333,0.8235366724638146,0.7643333333333333,0.7858148913986999,1.0
118
- 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7986666666666666,0.83233218480008,0.7986666666666666,0.8115886421806521,1.0
119
- 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7923333333333333,0.8231874218285514,0.7923333333333333,0.803363661387202,1.0
120
- 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7936666666666666,0.8268750473800219,0.7936666666666666,0.8057720333101867,1.0
121
- 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.801,0.830389411421043,0.801,0.8117656427717702,1.0
122
- 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.795,0.8280696193638868,0.795,0.8068114730639832,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat_torch.float16_lf,0.742,0.7477056799746837,0.742,0.7371050181385632,0.8033333333333333
3
+ 0.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-35_torch.float16_lf,0.709,0.7987219597893886,0.709,0.7427961200958145,1.0
4
+ 0.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-70_torch.float16_lf,0.7163333333333334,0.8058657875960304,0.7163333333333334,0.7487811196109319,0.9993333333333332
5
+ 0.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6996666666666667,0.802722482275839,0.6996666666666667,0.7370938556711591,1.0
6
+ 0.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7716666666666666,0.8092193821623755,0.7716666666666666,0.7864287269398251,1.0
7
+ 1.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-175_torch.float16_lf,0.78,0.810582723471486,0.78,0.7924651054056209,1.0
8
+ 1.2,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7313333333333333,0.8157783263996798,0.7313333333333333,0.7628807622782868,1.0
9
+ 1.4,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-245_torch.float16_lf,0.751,0.8125856808988221,0.751,0.7745416635653988,1.0
10
+ 1.6,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-280_torch.float16_lf,0.739,0.8097375095673094,0.739,0.7662329023371559,1.0
11
+ 1.8,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-315_torch.float16_lf,0.7236666666666667,0.8145530585912838,0.7236666666666667,0.7580428816095297,1.0
12
+ 2.0,Llama3.1-8B-Chinese-Chat,shenzhi-wang/Llama3.1-8B-Chinese-Chat/checkpoint-350_torch.float16_lf,0.7293333333333333,0.8151184301713545,0.7293333333333333,0.7616699266814145,1.0
13
+ 0.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat_torch.bfloat16_4bit_lf,0.7636666666666667,0.7806653325131986,0.7636666666666667,0.7525813484548423,0.0096666666666666
14
+ 0.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-35_torch.bfloat16_4bit_lf,0.778,0.8148707737020212,0.778,0.7910805488003003,0.9996666666666668
15
+ 0.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7306666666666667,0.8145782271710159,0.7306666666666667,0.7624724104697406,1.0
16
+ 0.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7193333333333334,0.8213567226911125,0.7193333333333334,0.7560702640626931,1.0
17
+ 0.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-140_torch.bfloat16_4bit_lf,0.7563333333333333,0.826789897753756,0.7563333333333333,0.7815164366677209,1.0
18
+ 1.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
19
+ 1.2,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7326666666666667,0.8265345821998035,0.7326666666666667,0.7644418492070342,1.0
20
+ 1.4,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7556666666666667,0.8258994609525315,0.7556666666666667,0.7820405339757727,1.0
21
+ 1.6,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-280_torch.bfloat16_4bit_lf,0.757,0.8264461657684251,0.757,0.7834496144681513,1.0
22
+ 1.8,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7546666666666667,0.8277723752096544,0.7546666666666667,0.7823584779069335,1.0
23
+ 2.0,Llama3.1-70B-Chinese-Chat,shenzhi-wang/Llama3.1-70B-Chinese-Chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7496666666666667,0.8282310230333227,0.7496666666666667,0.7791947625361637,1.0
24
+ 0.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat_torch.float16_lf,0.6946666666666667,0.701136267898111,0.6946666666666667,0.6634078645357937,0.0116666666666666
25
+ 0.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-35_torch.float16_lf,0.702,0.7932731014186957,0.702,0.7342714734731689,1.0
26
+ 0.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-70_torch.float16_lf,0.742,0.78982949223512,0.742,0.7536681109811127,1.0
27
+ 0.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-105_torch.float16_lf,0.6596666666666666,0.7923396753604393,0.6596666666666666,0.7067542301676931,1.0
28
+ 0.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-140_torch.float16_lf,0.7146666666666667,0.7861341885687435,0.7146666666666667,0.7404677278137267,1.0
29
+ 1.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-175_torch.float16_lf,0.7326666666666667,0.7876867721932461,0.7326666666666667,0.7471869515031995,1.0
30
+ 1.2,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-210_torch.float16_lf,0.7016666666666667,0.7903119228393193,0.7016666666666667,0.7348708822385348,1.0
31
+ 1.4,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-245_torch.float16_lf,0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
32
+ 1.6,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-280_torch.float16_lf,0.7156666666666667,0.7846106674095725,0.7156666666666667,0.7410042005708856,1.0
33
+ 1.8,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-315_torch.float16_lf,0.6916666666666667,0.7864256994491394,0.6916666666666667,0.7257499426487266,1.0
34
+ 2.0,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/checkpoint-350_torch.float16_lf,0.6976666666666667,0.7889443494370009,0.6976666666666667,0.7307996137659796,1.0
35
+ 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
36
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
37
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
38
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
39
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
40
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
41
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
42
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
43
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
44
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
45
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
46
+ 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.4813333333333333,0.7605248207587668,0.4813333333333333,0.5244515621126862,0.9986666666666668
47
+ 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
48
+ 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
49
+ 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
50
+ 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
51
+ 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
52
+ 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
53
+ 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
54
+ 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
55
+ 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
56
+ 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
57
+ 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
58
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
59
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
60
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
61
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
62
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
63
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
64
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
65
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
66
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
67
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
68
+ 0.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct_torch.float16_lf,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
69
+ 0.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-35_torch.float16_lf,0.725,0.7840171468707405,0.725,0.748994536667058,0.9996666666666668
70
+ 0.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-70_torch.float16_lf,0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
71
+ 0.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-105_torch.float16_lf,0.6926666666666667,0.8039176975550218,0.6926666666666667,0.7332481528585848,1.0
72
+ 0.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-140_torch.float16_lf,0.725,0.7952719247171957,0.725,0.7476238017654298,1.0
73
+ 1.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-175_torch.float16_lf,0.6756666666666666,0.7810148934939715,0.6756666666666666,0.708653993277772,1.0
74
+ 1.2,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-210_torch.float16_lf,0.7013333333333334,0.7969562600853992,0.7013333333333334,0.7362679665494508,1.0
75
+ 1.4,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-245_torch.float16_lf,0.7326666666666667,0.7922538479314682,0.7326666666666667,0.755402136631717,0.9996666666666668
76
+ 1.6,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-280_torch.float16_lf,0.6983333333333334,0.785127298428753,0.6983333333333334,0.7292251109166867,1.0
77
+ 1.8,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-315_torch.float16_lf,0.6783333333333333,0.785390767631834,0.6783333333333333,0.7164131321837346,1.0
78
+ 2.0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/checkpoint-350_torch.float16_lf,0.689,0.7929715746898984,0.689,0.7259993126510194,1.0
79
+ 0.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch.bfloat16_4bit_lf,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
80
+ 0.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7583333333333333,0.8199928526815756,0.7583333333333333,0.782751089787442,1.0
81
+ 0.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.7366666666666667,0.8224865755517643,0.7366666666666667,0.7700627366337021,1.0
82
+ 0.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.757,0.8253824826209251,0.757,0.784000409833628,1.0
83
+ 0.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7893333333333333,0.8229104753645825,0.7893333333333333,0.8033124955993173,1.0
84
+ 1.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7376666666666667,0.8243654864769323,0.7376666666666667,0.7699617360961548,1.0
85
+ 1.2,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.763,0.8318882808702871,0.763,0.7901075708186186,1.0
86
+ 1.4,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.7656666666666667,0.8288272203240518,0.7656666666666667,0.790627109330698,1.0
87
+ 1.6,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7693333333333333,0.8292798021666021,0.7693333333333333,0.7930169589012503,1.0
88
+ 1.8,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.784,0.8354349234761956,0.784,0.804194683154365,1.0
89
+ 2.0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7736666666666666,0.8330147983140184,0.7736666666666666,0.7973657072550873,1.0
90
+ 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.5686666666666667,0.6890626192990656,0.5686666666666667,0.5343419392280258,1.0
91
+ 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6776666666666666,0.7316274074759973,0.6776666666666666,0.6973667827682657,0.999
92
+ 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6653333333333333,0.758958550711478,0.6653333333333333,0.6997203293940804,1.0
93
+ 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.6963333333333334,0.7542353738754336,0.6963333333333334,0.7176790005966858,1.0
94
+ 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.7006666666666667,0.7661325413638986,0.7006666666666667,0.726396504358645,1.0
95
+ 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.6956666666666667,0.766520728596782,0.6956666666666667,0.7232763444431306,1.0
96
+ 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.683,0.7728473029454707,0.6830000000000002,0.7172831663070369,1.0
97
+ 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
98
+ 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
99
+ 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
100
+ 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
101
  0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.6436666666666667,0.717651042027604,0.6436666666666667,0.6066932578767255,1.0
102
  0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.7473333333333333,0.759526705532232,0.7473333333333333,0.7480522291877509,0.998
103
  0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.752,0.7774114736945115,0.752,0.7611191332452362,0.9996666666666668
 
109
  1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7726666666666666,0.8006851113573145,0.7726666666666666,0.7813968284378919,0.9996666666666668
110
  1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7696666666666667,0.799287702962426,0.7696666666666667,0.7792120245789584,0.9993333333333332
111
  2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.769,0.8010881984531473,0.769,0.7793801070552965,0.9996666666666668
112
+ 0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
113
+ 0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
114
+ 0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
115
+ 0.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-105_torch.bfloat16_4bit_lf,0.7576666666666667,0.8111059140562599,0.7576666666666667,0.778271965273475,1.0
116
+ 0.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-140_torch.bfloat16_4bit_lf,0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
117
+ 1.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-175_torch.bfloat16_4bit_lf,0.7396666666666667,0.8132229388907013,0.7396666666666667,0.768164418914878,1.0
118
+ 1.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-210_torch.bfloat16_4bit_lf,0.7756666666666666,0.8208038975271454,0.7756666666666666,0.7934902567321389,1.0
119
+ 1.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-245_torch.bfloat16_4bit_lf,0.767,0.8089726144740825,0.767,0.7826437373554418,1.0
120
+ 1.6,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-280_torch.bfloat16_4bit_lf,0.7643333333333333,0.8106183296950366,0.7643333333333333,0.7823942859806713,1.0
121
+ 1.8,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-315_torch.bfloat16_4bit_lf,0.774,0.814496213163251,0.774,0.7893524517536102,1.0
122
+ 2.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-350_torch.bfloat16_4bit_lf,0.7673333333333333,0.8134721321251935,0.7673333333333333,0.7849776453559993,1.0
data/internlm2_5-20b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
3
- 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7723333333333333,0.8004877872664371,0.7723333333333333,0.7800315047324102,1.0
4
- 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.788,0.808878367860496,0.788,0.7952965901503556,1.0
5
- 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7223333333333334,0.8101427633407874,0.7223333333333334,0.7527524454293278,1.0
6
- 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.8063333333333333,0.8207793607428686,0.8063333333333333,0.811239851005161,1.0
7
- 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.792,0.8244746715585061,0.792,0.8028680300441688,1.0
8
- 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.77,0.8305821984199763,0.77,0.7905012003721434,1.0
9
- 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.8033333333333333,0.8215999742478901,0.8033333333333333,0.8087445768968825,1.0
10
- 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.795,0.8261993807231882,0.795,0.805022820640186,1.0
11
- 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.779,0.8256828719565774,0.779,0.7946766547953676,1.0
12
- 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7826666666666666,0.8284951420712369,0.7826666666666666,0.7978785507522372,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat_torch.bfloat16_4bit_lf,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
3
+ 0.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-35_torch.bfloat16_4bit_lf,0.7576666666666667,0.7960640143421251,0.7576666666666667,0.769346697622254,1.0
4
+ 0.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-70_torch.bfloat16_4bit_lf,0.7743333333333333,0.8042791719587958,0.7743333333333333,0.7849233169481004,1.0
5
+ 0.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-105_torch.bfloat16_4bit_lf,0.7053333333333334,0.8070587351344375,0.7053333333333334,0.7421985241641746,1.0
6
+ 0.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-140_torch.bfloat16_4bit_lf,0.795,0.817457691710893,0.795,0.8027552955647029,1.0
7
+ 1.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-175_torch.bfloat16_4bit_lf,0.7786666666666666,0.8220512342362645,0.7786666666666666,0.7938353741035283,1.0
8
+ 1.2,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-210_torch.bfloat16_4bit_lf,0.7516666666666667,0.8264680853251051,0.7516666666666667,0.7787088167337303,1.0
9
+ 1.4,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-245_torch.bfloat16_4bit_lf,0.7876666666666666,0.8154190698395475,0.7876666666666666,0.7965399224841393,1.0
10
+ 1.6,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-280_torch.bfloat16_4bit_lf,0.7753333333333333,0.8181125383376948,0.7753333333333333,0.7899794199099057,1.0
11
+ 1.8,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-315_torch.bfloat16_4bit_lf,0.7583333333333333,0.8179523170315577,0.7583333333333333,0.7795358413482081,1.0
12
+ 2.0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/checkpoint-350_torch.bfloat16_4bit_lf,0.7616666666666667,0.8208475549648238,0.7616666666666667,0.7826736174247095,1.0
data/internlm2_5-20b-chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/internlm2_5-20b-chat_shots_metrics.csv CHANGED
@@ -1,2 +1,2 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.575,0.7745319004159336,0.575,0.6416875854199033,0.6726666666666666
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
data/internlm2_5-7b-chat-1m_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
3
- 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7907732469871145,0.7843333333333333,0.7839137508042926,1.0
4
- 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7876666666666666,0.7961110449860888,0.7876666666666666,0.790011839264191,1.0
5
- 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.74,0.818451985781803,0.74,0.7654385146358808,1.0
6
- 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.8076666666666666,0.8048844422436796,0.8076666666666666,0.8049749805997191,1.0
7
- 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7796666666666666,0.8115925869684188,0.7796666666666666,0.7917308842405348,1.0
8
- 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7816666666666666,0.8082575556171326,0.7816666666666666,0.7920155623671598,1.0
9
- 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7736666666666666,0.8074649930391711,0.7736666666666666,0.7846002379939621,1.0
10
- 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.771,0.8124579857634519,0.771,0.7859698091956198,1.0
11
- 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.7646666666666667,0.8211516901334176,0.7646666666666667,0.7848541283802248,1.0
12
- 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.77,0.8144910397034413,0.77,0.7862970454955438,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m_torch.bfloat16_lf,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
3
+ 0.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-35_torch.bfloat16_lf,0.7843333333333333,0.7977648302848388,0.7843333333333333,0.7864944570659659,1.0
4
+ 0.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-70_torch.bfloat16_lf,0.7836666666666666,0.7996977262947886,0.7836666666666666,0.7886881726841081,1.0
5
+ 0.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-105_torch.bfloat16_lf,0.7243333333333334,0.8171172705912051,0.7243333333333334,0.7565804830382912,1.0
6
+ 0.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-140_torch.bfloat16_lf,0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
7
+ 1.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-175_torch.bfloat16_lf,0.7676666666666667,0.8108441731715863,0.7676666666666667,0.7843187816704813,1.0
8
+ 1.2,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-210_torch.bfloat16_lf,0.7736666666666666,0.8091671780923799,0.7736666666666666,0.7876874850235454,1.0
9
+ 1.4,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-245_torch.bfloat16_lf,0.7623333333333333,0.8062291602218205,0.7623333333333333,0.777669094563925,1.0
10
+ 1.6,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-280_torch.bfloat16_lf,0.7553333333333333,0.8086197936829652,0.7553333333333333,0.7755588811428297,1.0
11
+ 1.8,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-315_torch.bfloat16_lf,0.748,0.8171996792797457,0.748,0.773990849396903,1.0
12
+ 2.0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/checkpoint-350_torch.bfloat16_lf,0.756,0.8126875394266148,0.756,0.7777812522863184,1.0
data/internlm2_5-7b-chat-1m_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/internlm2_5-7b-chat-1m_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.49233333333333335,0.7570993062022159,0.49233333333333335,0.5279738886353613,0.9986666666666667
3
- 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7753333333333333,0.7586378181445387,0.7753333333333333,0.7665405919258307,0.9453333333333334
4
- 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.654,0.7251381758855274,0.654,0.6681655588675279,0.8866666666666667
5
- 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.677,0.7296467412730754,0.677,0.6780570012166849,0.8213333333333334
6
- 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.68,0.7425906069240685,0.68,0.6837924261094331,0.8236666666666667
7
- 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.726,0.7533750344411337,0.726,0.7132456474026365,0.8336666666666667
8
- 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7173333333333334,0.7471186719787132,0.7173333333333334,0.6980283743779222,0.8846666666666667
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-00,0.48133333333333334,0.7605248207587668,0.48133333333333334,0.5244515621126862,0.9986666666666667
3
+ 5,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-05,0.7763333333333333,0.7640598325070357,0.7763333333333333,0.7700878172419743,0.9453333333333334
4
+ 10,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-10,0.6473333333333333,0.7282065610714444,0.6473333333333333,0.665824871588245,0.8866666666666667
5
+ 20,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-20,0.6733333333333333,0.7314610506764355,0.6733333333333333,0.6764198712634657,0.8213333333333334
6
+ 30,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-30,0.6736666666666666,0.7482542000402412,0.6736666666666666,0.6810446770610585,0.8236666666666667
7
+ 40,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-40,0.724,0.7567654663125225,0.724,0.712500180941536,0.8336666666666667
8
+ 50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
data/internlm2_5-7b-chat_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
3
- 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.729,0.7861122408311365,0.729,0.7385163226667387,1.0
4
- 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.7336666666666667,0.7857703796539939,0.7336666666666667,0.7427841254119673,1.0
5
- 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6876666666666666,0.8030976203819039,0.6876666666666666,0.7170750416800897,1.0
6
- 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.762,0.8063331692665241,0.762,0.7740172985498378,1.0
7
- 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.7416666666666667,0.812190204769964,0.7416666666666667,0.761129466343473,1.0
8
- 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.8084922204218251,0.7443333333333333,0.7599422989743019,1.0
9
- 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.762,0.8089123492151512,0.762,0.7753217972757948,1.0
10
- 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.733,0.8092774765454144,0.733,0.7535080746086277,1.0
11
- 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.7156666666666667,0.814456776214162,0.7156666666666667,0.744622807072089,1.0
12
- 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.725,0.8148156790328904,0.725,0.7509650741005044,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat_torch.bfloat16_lf,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
+ 0.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-35_torch.bfloat16_lf,0.7193333333333334,0.7863486093365692,0.7193333333333334,0.7330498811142795,1.0
4
+ 0.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-70_torch.bfloat16_lf,0.726,0.7900250828103491,0.726,0.7396583495246526,1.0
5
+ 0.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-105_torch.bfloat16_lf,0.6736666666666666,0.8044565554629858,0.6736666666666666,0.7104123104529902,1.0
6
+ 0.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-140_torch.bfloat16_lf,0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
7
+ 1.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-175_torch.bfloat16_lf,0.726,0.8094634420846424,0.726,0.751394838822856,1.0
8
+ 1.2,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-210_torch.bfloat16_lf,0.7276666666666667,0.8039673699820601,0.7276666666666667,0.7488653386949028,1.0
9
+ 1.4,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-245_torch.bfloat16_lf,0.747,0.8055537753403307,0.747,0.76527383722639,1.0
10
+ 1.6,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-280_torch.bfloat16_lf,0.7166666666666667,0.8059535682746547,0.7166666666666667,0.7432427946178835,1.0
11
+ 1.8,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-315_torch.bfloat16_lf,0.6983333333333334,0.8119110469658597,0.6983333333333334,0.7347246872892312,1.0
12
+ 2.0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/checkpoint-350_torch.bfloat16_lf,0.7076666666666667,0.8120132783051135,0.7076666666666667,0.7408145046817652,1.0
data/internlm2_5-7b-chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/internlm2_5-7b-chat_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.7063333333333334,0.7369785607161373,0.7063333333333334,0.6895815239121195,1.0
3
- 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.747,0.7433195768374967,0.747,0.7232456014841266,0.999
4
- 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.559,0.7306434812774306,0.559,0.6287391975839828,0.9883333333333333
5
- 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.6466666666666666,0.7143354332969056,0.6466666666666666,0.6738164117926014,0.9473333333333334
6
- 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.626,0.7223442225693745,0.626,0.6494216734706632,0.9403333333333334
7
- 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.64,0.7020393671564193,0.64,0.611996460461355,0.9813333333333333
8
- 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6116666666666667,0.6808793455512054,0.6116666666666667,0.5502581431071487,0.9803333333333333
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-00,0.705,0.7398041613378253,0.705,0.6906357423169466,1.0
3
+ 5,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-05,0.7476666666666667,0.746806876028684,0.7476666666666667,0.7270588443494302,0.999
4
+ 10,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-10,0.5533333333333333,0.7301739373336078,0.5533333333333333,0.625097481985829,0.9883333333333333
5
+ 20,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-20,0.647,0.721136036365055,0.647,0.6769738108371004,0.9473333333333334
6
+ 30,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-30,0.6263333333333333,0.7256804685839701,0.6263333333333333,0.6534519727626863,0.9403333333333334
7
+ 40,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-40,0.642,0.7045435138304105,0.642,0.6161646934220135,0.9813333333333333
8
+ 50,internlm2_5-7b-chat,internlm/internlm2_5-7b-chat/shots-50,0.6166666666666667,0.6959837361921766,0.6166666666666667,0.5567537556050285,0.9803333333333333
data/openai_metrics.csv CHANGED
@@ -1,29 +1,29 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7166666666666667,0.7800918028217227,0.7166666666666667,0.7260056154268697,0.9916666666666667
3
- 0,gpt-4o,gpt-4o/shots-00,0.792,0.8234582231232066,0.792,0.8022633746318892,0.066
4
- 0,o1-mini,o1-mini/shots-00,0.7133333333333334,0.78301872209321,0.7133333333333334,0.7402734333211688,0.999
5
- 0,o1-preview,o1-preview/shots-00,0.725,0.7860443296236067,0.725,0.7471736898827371,0.998
6
- 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7203333333333334,0.7754800244789168,0.7203333333333334,0.718540502683781,0.9996666666666667
7
- 5,gpt-4o,gpt-4o/shots-05,0.7973333333333333,0.8251066339666824,0.7973333333333333,0.8066429877716694,0.998
8
- 5,o1-mini,o1-mini/shots-05,0.7313333333333333,0.7913577967036569,0.7313333333333333,0.7532525881890013,0.9966666666666667
9
- 5,o1-preview,o1-preview/shots-05,0.736,0.789169445854742,0.736,0.7557068489703724,0.979
10
- 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6836666666666666,0.7701177891593667,0.6836666666666666,0.6932016303210964,0.9983333333333333
11
- 10,gpt-4o,gpt-4o/shots-10,0.8013333333333333,0.8246834383036209,0.8013333333333333,0.8098901724387172,0.9996666666666667
12
- 10,o1-mini,o1-mini/shots-10,0.7283333333333334,0.7851844846890333,0.7283333333333334,0.7490987096521479,0.9943333333333333
13
- 10,o1-preview,o1-preview/shots-10,0.7513333333333333,0.7947574632958824,0.7513333333333333,0.7673707529850041,0.9873333333333333
14
- 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6616666666666666,0.7627467933668375,0.6616666666666666,0.677372757519069,0.998
15
- 20,gpt-4o,gpt-4o/shots-20,0.79,0.822098231279132,0.79,0.8020290214439503,0.9993333333333333
16
- 20,o1-mini,o1-mini/shots-20,0.7373333333333333,0.7815727856803751,0.7373333333333333,0.7533353509620383,0.9946666666666667
17
- 20,o1-preview,o1-preview/shots-20,0.7483333333333333,0.790639591375103,0.7483333333333333,0.763324860719675,0.9853333333333333
18
- 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6876666666666666,0.7663381611066244,0.6876666666666666,0.6896169854446027,0.999
19
- 30,gpt-4o,gpt-4o/shots-30,0.7946666666666666,0.8259436682564079,0.7946666666666666,0.8063113377291872,0.999
20
- 30,o1-mini,o1-mini/shots-30,0.748,0.779168441371953,0.748,0.7583397172973073,0.9976666666666667
21
- 30,o1-preview,o1-preview/shots-30,0.7513333333333333,0.792049804996314,0.7513333333333333,0.7654800949250774,0.984
22
- 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6903333333333334,0.7603850760051853,0.6903333333333334,0.688393665975117,0.9986666666666667
23
- 40,gpt-4o,gpt-4o/shots-40,0.7906666666666666,0.8242154446428003,0.7906666666666666,0.803356987717753,0.9973333333333333
24
- 40,o1-mini,o1-mini/shots-40,0.7496666666666667,0.775765877349714,0.7496666666666667,0.757640226210139,0.9976666666666667
25
- 40,o1-preview,o1-preview/shots-40,0.7526666666666667,0.795308022968859,0.7526666666666667,0.7672762517397222,0.984
26
- 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.7143333333333334,0.7654214682013311,0.7143333333333334,0.7056961582308003,0.9993333333333333
27
- 50,gpt-4o,gpt-4o/shots-50,0.798,0.8274250231711487,0.798,0.8091066504350897,0.9993333333333333
28
- 50,o1-mini,o1-mini/shots-50,0.7536666666666667,0.7755130422727871,0.7536666666666667,0.7602241520634903,0.9976666666666667
29
- 50,o1-preview,o1-preview/shots-50,0.7576666666666667,0.7986597718440941,0.7576666666666667,0.7718331604189232,0.9816666666666667
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,gpt-4o-mini,gpt-4o-mini/shots-00,0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,0.9916666666666667
3
+ 0,gpt-4o,gpt-4o/shots-00,0.782,0.8204048322982596,0.782,0.7953019682198627,0.066
4
+ 0,o1-mini,o1-mini/shots-00,0.7083333333333334,0.7848098266888749,0.7083333333333334,0.7377068425566796,0.999
5
+ 0,o1-preview,o1-preview/shots-00,0.721,0.7849371317342158,0.721,0.7451207069815194,0.998
6
+ 5,gpt-4o-mini,gpt-4o-mini/shots-05,0.7176666666666667,0.7767294185987051,0.7176666666666667,0.7181068311028772,0.9996666666666667
7
+ 5,gpt-4o,gpt-4o/shots-05,0.7873333333333333,0.8230974205170392,0.7873333333333333,0.8000290527498529,0.998
8
+ 5,o1-mini,o1-mini/shots-05,0.724,0.7905045610386181,0.724,0.7482963122126776,0.9966666666666667
9
+ 5,o1-preview,o1-preview/shots-05,0.7313333333333333,0.7878283093765627,0.7313333333333333,0.7535489719321234,0.979
10
+ 10,gpt-4o-mini,gpt-4o-mini/shots-10,0.6793333333333333,0.7728086050218999,0.6793333333333333,0.6916749681933937,0.9983333333333333
11
+ 10,gpt-4o,gpt-4o/shots-10,0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
12
+ 10,o1-mini,o1-mini/shots-10,0.725,0.7892485648334764,0.725,0.7485623974683336,0.9943333333333333
13
+ 10,o1-preview,o1-preview/shots-10,0.749,0.7964482186234537,0.749,0.7677316493549238,0.9873333333333333
14
+ 20,gpt-4o-mini,gpt-4o-mini/shots-20,0.6623333333333333,0.7686706009175459,0.6623333333333333,0.6798015109939115,0.998
15
+ 20,gpt-4o,gpt-4o/shots-20,0.7816666666666666,0.8204541793856629,0.7816666666666666,0.7967017169880498,0.9993333333333333
16
+ 20,o1-mini,o1-mini/shots-20,0.7343333333333333,0.786101455887261,0.7343333333333333,0.7535300565051624,0.9946666666666667
17
+ 20,o1-preview,o1-preview/shots-20,0.7443333333333333,0.7911442834260676,0.7443333333333333,0.7625144090816939,0.9853333333333333
18
+ 30,gpt-4o-mini,gpt-4o-mini/shots-30,0.6873333333333334,0.7684209723431035,0.6873333333333334,0.6913018667081989,0.999
19
+ 30,gpt-4o,gpt-4o/shots-30,0.7886666666666666,0.8260847852316618,0.7886666666666666,0.8030949295928699,0.999
20
+ 30,o1-mini,o1-mini/shots-30,0.7416666666666667,0.7791875084643942,0.7416666666666667,0.7548378729964869,0.9976666666666667
21
+ 30,o1-preview,o1-preview/shots-30,0.7473333333333333,0.7920604378746952,0.7473333333333333,0.7643977099599287,0.984
22
+ 40,gpt-4o-mini,gpt-4o-mini/shots-40,0.6923333333333334,0.7639874967862498,0.6923333333333334,0.6924934068935911,0.9986666666666667
23
+ 40,gpt-4o,gpt-4o/shots-40,0.784,0.8233509309291644,0.784,0.7993336791122846,0.9973333333333333
24
+ 40,o1-mini,o1-mini/shots-40,0.7466666666666667,0.7783660257118015,0.7466666666666667,0.7572644424023218,0.9976666666666667
25
+ 40,o1-preview,o1-preview/shots-40,0.7506666666666667,0.7964679024468982,0.7506666666666667,0.7674109766459014,0.984
26
+ 50,gpt-4o-mini,gpt-4o-mini/shots-50,0.717,0.7692638634416518,0.717,0.7105227254860433,0.9993333333333333
27
+ 50,gpt-4o,gpt-4o/shots-50,0.787,0.8234800466218334,0.787,0.8013530974301947,0.9993333333333333
28
+ 50,o1-mini,o1-mini/shots-50,0.75,0.7767849265833893,0.75,0.7590020698968893,0.9976666666666667
29
+ 50,o1-preview,o1-preview/shots-50,0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9816666666666667
data/openai_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
datasets/mgtv/dev.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:449f236786e2105cd1dd0ba5f4a037c3608a03d73a24597e880cc5009e8c53b6
3
- size 2741482
 
 
 
 
datasets/mgtv/val.csv CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/00_Data Analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4acc31866231416fdea442abacb9cc507b8a27bd6b534ee2fc1d523bc839a7e4
3
- size 1066113
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c27a6be2814f5be1f48aa22ad45c264d24556d09cb5347629dab1cf0755ab97
3
+ size 1148416
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36edd34a243400b5ea442b45b15f0c8caa79434e5efd0716b9c672401219597a
3
- size 6383478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734e3db8a49e84a606f2d38ca30e4d0e8ff3cdb2c67684d5825356c023cf08fc
3
+ size 6385778
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cd1d720ee03fe5f6243b6be4af970d687f70dcc2455babd96d74f578db99d7e
3
- size 2394646
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f887698f08bdb2f767c6edbb0329171670ab1ae3e53e4eb1b065ea8c175d864
3
+ size 2385934
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1898e0248db2b32153ab90e358130da2c3219b15bbecaff2b1cbb544e2ae46
3
- size 6121033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef39d306780d8c6717de34adbaa5a7180854019439fa151016007eb8d52f6a05
3
+ size 6122973
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f205b7db1031c445f15a90816e83e6f24c5e00ec3aa2b677e315ee53a4d4f321
3
- size 14774860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fbd8541e14161d6ff7a5668ce60021cda2bd703819dab6bb405cc6c25553e3
3
+ size 14778268
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:451ea88855aa38161a291ae201037246032866f08cc7169a4d514cd5bc4c8c08
3
- size 1782391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cda076bb798749bdd0a680484547091497d7bd4dc67be75fd40bc75e0651b27
3
+ size 1789053
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db40224d1f02ed8c9292f72b6697be2060677c8bf430636ac9e15b38a5267f23
3
- size 2072847
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223ce2a464dbad51d4f2cb02e50bcc656c2e1774366f30051e7a4a10505c3c0a
3
+ size 2080974