bartekupartek commited on
Commit
289332c
1 Parent(s): 0b09e6f

End of training

Browse files
Files changed (5) hide show
  1. README.md +17 -4
  2. all_results.json +16 -0
  3. eval_results.json +10 -0
  4. train_results.json +9 -0
  5. trainer_state.json +134 -0
README.md CHANGED
@@ -3,11 +3,24 @@ license: apache-2.0
3
  base_model: Qwen/Qwen2.5-0.5B-Instruct
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: llm2vec-qwen2.5-0.5-instruct
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # llm2vec-qwen2.5-0.5-instruct
17
 
18
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.7920
21
- - Accuracy: 0.6351
22
 
23
  ## Model description
24
 
 
3
  base_model: Qwen/Qwen2.5-0.5B-Instruct
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - wikitext
8
  metrics:
9
  - accuracy
10
  model-index:
11
  - name: llm2vec-qwen2.5-0.5-instruct
12
+ results:
13
+ - task:
14
+ name: Masked Language Modeling
15
+ type: fill-mask
16
+ dataset:
17
+ name: wikitext wikitext-103-raw-v1
18
+ type: wikitext
19
+ args: wikitext-103-raw-v1
20
+ metrics:
21
+ - name: Accuracy
22
+ type: accuracy
23
+ value: 0.629556877924779
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # llm2vec-qwen2.5-0.5-instruct
30
 
31
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the wikitext wikitext-103-raw-v1 dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 1.8264
34
+ - Accuracy: 0.6296
35
 
36
  ## Model description
37
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.08279516476237787,
3
+ "eval_accuracy": 0.629556877924779,
4
+ "eval_loss": 1.8264291286468506,
5
+ "eval_runtime": 6.3204,
6
+ "eval_samples": 408,
7
+ "eval_samples_per_second": 64.553,
8
+ "eval_steps_per_second": 2.057,
9
+ "perplexity": 6.211665948497031,
10
+ "total_flos": 2.198926000128e+16,
11
+ "train_loss": 2.0824306030273436,
12
+ "train_runtime": 944.4845,
13
+ "train_samples": 193233,
14
+ "train_samples_per_second": 613.773,
15
+ "train_steps_per_second": 38.364
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.08279516476237787,
3
+ "eval_accuracy": 0.629556877924779,
4
+ "eval_loss": 1.8264291286468506,
5
+ "eval_runtime": 6.3204,
6
+ "eval_samples": 408,
7
+ "eval_samples_per_second": 64.553,
8
+ "eval_steps_per_second": 2.057,
9
+ "perplexity": 6.211665948497031
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.08279516476237787,
3
+ "total_flos": 2.198926000128e+16,
4
+ "train_loss": 2.0824306030273436,
5
+ "train_runtime": 944.4845,
6
+ "train_samples": 193233,
7
+ "train_samples_per_second": 613.773,
8
+ "train_steps_per_second": 38.364
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.08279516476237787,
5
+ "eval_steps": 100,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008279516476237788,
13
+ "eval_accuracy": 0.5510729122994245,
14
+ "eval_loss": 2.3376457691192627,
15
+ "eval_runtime": 6.8445,
16
+ "eval_samples_per_second": 59.61,
17
+ "eval_steps_per_second": 1.899,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.016559032952475575,
22
+ "eval_accuracy": 0.5764662925666846,
23
+ "eval_loss": 2.173574447631836,
24
+ "eval_runtime": 6.6808,
25
+ "eval_samples_per_second": 61.071,
26
+ "eval_steps_per_second": 1.946,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 0.024838549428713365,
31
+ "eval_accuracy": 0.5929699147520929,
32
+ "eval_loss": 2.0678670406341553,
33
+ "eval_runtime": 6.4137,
34
+ "eval_samples_per_second": 63.614,
35
+ "eval_steps_per_second": 2.027,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 0.03311806590495115,
40
+ "eval_accuracy": 0.6055573666749765,
41
+ "eval_loss": 1.9839483499526978,
42
+ "eval_runtime": 6.4017,
43
+ "eval_samples_per_second": 63.734,
44
+ "eval_steps_per_second": 2.031,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 0.04139758238118894,
49
+ "grad_norm": 8.5625,
50
+ "learning_rate": 4.931004029364685e-05,
51
+ "loss": 2.2761,
52
+ "step": 500
53
+ },
54
+ {
55
+ "epoch": 0.04139758238118894,
56
+ "eval_accuracy": 0.6084943562272814,
57
+ "eval_loss": 1.9611371755599976,
58
+ "eval_runtime": 6.4249,
59
+ "eval_samples_per_second": 63.503,
60
+ "eval_steps_per_second": 2.023,
61
+ "step": 500
62
+ },
63
+ {
64
+ "epoch": 0.04967709885742673,
65
+ "eval_accuracy": 0.6203082851637765,
66
+ "eval_loss": 1.905377984046936,
67
+ "eval_runtime": 6.4365,
68
+ "eval_samples_per_second": 63.388,
69
+ "eval_steps_per_second": 2.02,
70
+ "step": 600
71
+ },
72
+ {
73
+ "epoch": 0.057956615333664516,
74
+ "eval_accuracy": 0.6241699612328715,
75
+ "eval_loss": 1.8838109970092773,
76
+ "eval_runtime": 6.4118,
77
+ "eval_samples_per_second": 63.632,
78
+ "eval_steps_per_second": 2.028,
79
+ "step": 700
80
+ },
81
+ {
82
+ "epoch": 0.0662361318099023,
83
+ "eval_accuracy": 0.6295839990759813,
84
+ "eval_loss": 1.8403326272964478,
85
+ "eval_runtime": 6.397,
86
+ "eval_samples_per_second": 63.78,
87
+ "eval_steps_per_second": 2.032,
88
+ "step": 800
89
+ },
90
+ {
91
+ "epoch": 0.07451564828614009,
92
+ "eval_accuracy": 0.6300428691724719,
93
+ "eval_loss": 1.8234734535217285,
94
+ "eval_runtime": 6.4304,
95
+ "eval_samples_per_second": 63.449,
96
+ "eval_steps_per_second": 2.022,
97
+ "step": 900
98
+ },
99
+ {
100
+ "epoch": 0.08279516476237787,
101
+ "grad_norm": 7.65625,
102
+ "learning_rate": 4.862008058729371e-05,
103
+ "loss": 1.8887,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.08279516476237787,
108
+ "eval_accuracy": 0.6351211866350639,
109
+ "eval_loss": 1.7919981479644775,
110
+ "eval_runtime": 6.4,
111
+ "eval_samples_per_second": 63.75,
112
+ "eval_steps_per_second": 2.031,
113
+ "step": 1000
114
+ },
115
+ {
116
+ "epoch": 0.08279516476237787,
117
+ "step": 1000,
118
+ "total_flos": 2.198926000128e+16,
119
+ "train_loss": 2.0824306030273436,
120
+ "train_runtime": 944.4845,
121
+ "train_samples_per_second": 613.773,
122
+ "train_steps_per_second": 38.364
123
+ }
124
+ ],
125
+ "logging_steps": 500,
126
+ "max_steps": 36234,
127
+ "num_input_tokens_seen": 0,
128
+ "num_train_epochs": 3,
129
+ "save_steps": 200,
130
+ "total_flos": 2.198926000128e+16,
131
+ "train_batch_size": 16,
132
+ "trial_name": null,
133
+ "trial_params": null
134
+ }