iqbalasrif commited on
Commit
acf90aa
·
verified ·
1 Parent(s): f486475

Upload 16 files

Browse files
Files changed (7) hide show
  1. README.md +20 -20
  2. all_results.json +10 -10
  3. eval_results.json +6 -6
  4. train_results.json +4 -4
  5. trainer_state.json +44 -44
  6. training_args.bin +1 -1
  7. vocab.json +0 -0
README.md CHANGED
@@ -1,19 +1,19 @@
1
  ---
 
 
2
  base_model: openai/whisper-tiny.en
 
 
3
  datasets:
4
  - lalipa/jv_id_asr_split
5
- library_name: transformers
6
- license: apache-2.0
7
  metrics:
8
  - wer
9
- tags:
10
- - generated_from_trainer
11
  model-index:
12
- - name: finetune
13
  results:
14
  - task:
15
- type: automatic-speech-recognition
16
  name: Automatic Speech Recognition
 
17
  dataset:
18
  name: lalipa/jv_id_asr_split jv_id_asr_source
19
  type: lalipa/jv_id_asr_split
@@ -21,21 +21,21 @@ model-index:
21
  split: validation
22
  args: jv_id_asr_source
23
  metrics:
24
- - type: wer
25
- value: 0.7835602493955974
26
- name: Wer
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
- # finetune
33
 
34
  This model is a fine-tuned version of [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) on the lalipa/jv_id_asr_split jv_id_asr_source dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 1.7784
37
- - Wer: 0.7836
38
- - Cer: 0.2535
39
 
40
  ## Model description
41
 
@@ -67,13 +67,13 @@ The following hyperparameters were used during training:
67
 
68
  ### Training results
69
 
70
- | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
71
- |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
72
- | 3.6903 | 0.2041 | 30 | 2.9875 | 1.0127 | 0.4365 |
73
- | 2.533 | 0.4082 | 60 | 2.2360 | 0.8879 | 0.2921 |
74
- | 2.0604 | 0.6122 | 90 | 1.9514 | 0.8253 | 0.2670 |
75
- | 1.852 | 0.8163 | 120 | 1.8182 | 0.7949 | 0.2581 |
76
- | 1.7929 | 1.0204 | 150 | 1.7784 | 0.7836 | 0.2535 |
77
 
78
 
79
  ### Framework versions
 
1
  ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
  base_model: openai/whisper-tiny.en
5
+ tags:
6
+ - generated_from_trainer
7
  datasets:
8
  - lalipa/jv_id_asr_split
 
 
9
  metrics:
10
  - wer
 
 
11
  model-index:
12
+ - name: from-scratch
13
  results:
14
  - task:
 
15
  name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
  dataset:
18
  name: lalipa/jv_id_asr_split jv_id_asr_source
19
  type: lalipa/jv_id_asr_split
 
21
  split: validation
22
  args: jv_id_asr_source
23
  metrics:
24
+ - name: Wer
25
+ type: wer
26
+ value: 6.432243287950121
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
+ # from-scratch
33
 
34
  This model is a fine-tuned version of [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) on the lalipa/jv_id_asr_split jv_id_asr_source dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 10.8537
37
+ - Wer: 6.4322
38
+ - Cer: 9.6337
39
 
40
  ## Model description
41
 
 
67
 
68
  ### Training results
69
 
70
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
71
+ |:-------------:|:------:|:----:|:---------------:|:-------:|:-------:|
72
+ | 10.8562 | 0.2041 | 30 | 10.8560 | 17.0625 | 24.9361 |
73
+ | 10.8557 | 0.4082 | 60 | 10.8553 | 20.3833 | 28.0200 |
74
+ | 10.8549 | 0.6122 | 90 | 10.8545 | 11.6823 | 17.2418 |
75
+ | 10.8542 | 0.8163 | 120 | 10.8539 | 10.5108 | 15.8910 |
76
+ | 10.8538 | 1.0204 | 150 | 10.8537 | 6.4322 | 9.6337 |
77
 
78
 
79
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
- "eval_cer": 0.253486835896952,
4
- "eval_loss": 1.7784144878387451,
5
- "eval_runtime": 159.8385,
6
  "eval_samples": 1136,
7
- "eval_samples_per_second": 7.107,
8
- "eval_steps_per_second": 0.444,
9
- "eval_wer": 0.7835602493955974,
10
  "total_flos": 2.3614434607104e+17,
11
- "train_loss": 2.385703277587891,
12
- "train_runtime": 5094.7107,
13
  "train_samples": 9400,
14
- "train_samples_per_second": 1.884,
15
- "train_steps_per_second": 0.029
16
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
+ "eval_cer": 9.633726526375389,
4
+ "eval_loss": 10.85368824005127,
5
+ "eval_runtime": 461.8513,
6
  "eval_samples": 1136,
7
+ "eval_samples_per_second": 2.46,
8
+ "eval_steps_per_second": 0.154,
9
+ "eval_wer": 6.432243287950121,
10
  "total_flos": 2.3614434607104e+17,
11
+ "train_loss": 10.854962158203126,
12
+ "train_runtime": 4570.606,
13
  "train_samples": 9400,
14
+ "train_samples_per_second": 2.1,
15
+ "train_steps_per_second": 0.033
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
- "eval_cer": 0.253486835896952,
4
- "eval_loss": 1.7784144878387451,
5
- "eval_runtime": 159.8385,
6
  "eval_samples": 1136,
7
- "eval_samples_per_second": 7.107,
8
- "eval_steps_per_second": 0.444,
9
- "eval_wer": 0.7835602493955974
10
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
+ "eval_cer": 9.633726526375389,
4
+ "eval_loss": 10.85368824005127,
5
+ "eval_runtime": 461.8513,
6
  "eval_samples": 1136,
7
+ "eval_samples_per_second": 2.46,
8
+ "eval_steps_per_second": 0.154,
9
+ "eval_wer": 6.432243287950121
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
  "total_flos": 2.3614434607104e+17,
4
- "train_loss": 2.385703277587891,
5
- "train_runtime": 5094.7107,
6
  "train_samples": 9400,
7
- "train_samples_per_second": 1.884,
8
- "train_steps_per_second": 0.029
9
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
  "total_flos": 2.3614434607104e+17,
4
+ "train_loss": 10.854962158203126,
5
+ "train_runtime": 4570.606,
6
  "train_samples": 9400,
7
+ "train_samples_per_second": 2.1,
8
+ "train_steps_per_second": 0.033
9
  }
trainer_state.json CHANGED
@@ -10,97 +10,97 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.20408163265306123,
13
- "grad_norm": 28829.30859375,
14
  "learning_rate": 1e-05,
15
- "loss": 3.6903,
16
  "step": 30
17
  },
18
  {
19
  "epoch": 0.20408163265306123,
20
- "eval_cer": 0.43646314994809854,
21
- "eval_loss": 2.987450122833252,
22
- "eval_runtime": 203.3511,
23
- "eval_samples_per_second": 5.586,
24
- "eval_steps_per_second": 0.349,
25
- "eval_wer": 1.0127242651736863,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.40816326530612246,
30
- "grad_norm": 26780.62109375,
31
  "learning_rate": 7.500000000000001e-06,
32
- "loss": 2.533,
33
  "step": 60
34
  },
35
  {
36
  "epoch": 0.40816326530612246,
37
- "eval_cer": 0.2920826649051618,
38
- "eval_loss": 2.235991954803467,
39
- "eval_runtime": 163.1323,
40
- "eval_samples_per_second": 6.964,
41
- "eval_steps_per_second": 0.435,
42
- "eval_wer": 0.8878992238198244,
43
  "step": 60
44
  },
45
  {
46
  "epoch": 0.6122448979591837,
47
- "grad_norm": 9.42530632019043,
48
  "learning_rate": 5e-06,
49
- "loss": 2.0604,
50
  "step": 90
51
  },
52
  {
53
  "epoch": 0.6122448979591837,
54
- "eval_cer": 0.26696234783429273,
55
- "eval_loss": 1.951379656791687,
56
- "eval_runtime": 171.6219,
57
- "eval_samples_per_second": 6.619,
58
- "eval_steps_per_second": 0.414,
59
- "eval_wer": 0.8252958391652883,
60
  "step": 90
61
  },
62
  {
63
  "epoch": 0.8163265306122449,
64
- "grad_norm": 724720.0625,
65
  "learning_rate": 2.5e-06,
66
- "loss": 1.852,
67
  "step": 120
68
  },
69
  {
70
  "epoch": 0.8163265306122449,
71
- "eval_cer": 0.25809191280551097,
72
- "eval_loss": 1.8181612491607666,
73
- "eval_runtime": 180.4927,
74
- "eval_samples_per_second": 6.294,
75
- "eval_steps_per_second": 0.393,
76
- "eval_wer": 0.7948848454001781,
77
  "step": 120
78
  },
79
  {
80
  "epoch": 1.0204081632653061,
81
- "grad_norm": 9.10916519165039,
82
  "learning_rate": 0.0,
83
- "loss": 1.7929,
84
  "step": 150
85
  },
86
  {
87
  "epoch": 1.0204081632653061,
88
- "eval_cer": 0.253486835896952,
89
- "eval_loss": 1.7784144878387451,
90
- "eval_runtime": 183.691,
91
- "eval_samples_per_second": 6.184,
92
- "eval_steps_per_second": 0.387,
93
- "eval_wer": 0.7835602493955974,
94
  "step": 150
95
  },
96
  {
97
  "epoch": 1.0204081632653061,
98
  "step": 150,
99
  "total_flos": 2.3614434607104e+17,
100
- "train_loss": 2.385703277587891,
101
- "train_runtime": 5094.7107,
102
- "train_samples_per_second": 1.884,
103
- "train_steps_per_second": 0.029
104
  }
105
  ],
106
  "logging_steps": 30,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.20408163265306123,
13
+ "grad_norm": 19.92782211303711,
14
  "learning_rate": 1e-05,
15
+ "loss": 10.8562,
16
  "step": 30
17
  },
18
  {
19
  "epoch": 0.20408163265306123,
20
+ "eval_cer": 24.936057374728698,
21
+ "eval_loss": 10.856045722961426,
22
+ "eval_runtime": 524.0657,
23
+ "eval_samples_per_second": 2.168,
24
+ "eval_steps_per_second": 0.135,
25
+ "eval_wer": 17.0624761420028,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.40816326530612246,
30
+ "grad_norm": 18.936391830444336,
31
  "learning_rate": 7.500000000000001e-06,
32
+ "loss": 10.8557,
33
  "step": 60
34
  },
35
  {
36
  "epoch": 0.40816326530612246,
37
+ "eval_cer": 28.020024535245824,
38
+ "eval_loss": 10.855280876159668,
39
+ "eval_runtime": 546.8477,
40
+ "eval_samples_per_second": 2.077,
41
+ "eval_steps_per_second": 0.13,
42
+ "eval_wer": 20.38325486703143,
43
  "step": 60
44
  },
45
  {
46
  "epoch": 0.6122448979591837,
47
+ "grad_norm": 0.06280206888914108,
48
  "learning_rate": 5e-06,
49
+ "loss": 10.8549,
50
  "step": 90
51
  },
52
  {
53
  "epoch": 0.6122448979591837,
54
+ "eval_cer": 17.241804284231385,
55
+ "eval_loss": 10.854511260986328,
56
+ "eval_runtime": 487.9237,
57
+ "eval_samples_per_second": 2.328,
58
+ "eval_steps_per_second": 0.146,
59
+ "eval_wer": 11.682275098613054,
60
  "step": 90
61
  },
62
  {
63
  "epoch": 0.8163265306122449,
64
+ "grad_norm": 18.20659828186035,
65
  "learning_rate": 2.5e-06,
66
+ "loss": 10.8542,
67
  "step": 120
68
  },
69
  {
70
  "epoch": 0.8163265306122449,
71
+ "eval_cer": 15.890988015476077,
72
+ "eval_loss": 10.853907585144043,
73
+ "eval_runtime": 553.861,
74
+ "eval_samples_per_second": 2.051,
75
+ "eval_steps_per_second": 0.128,
76
+ "eval_wer": 10.510752004071765,
77
  "step": 120
78
  },
79
  {
80
  "epoch": 1.0204081632653061,
81
+ "grad_norm": 17.312578201293945,
82
  "learning_rate": 0.0,
83
+ "loss": 10.8538,
84
  "step": 150
85
  },
86
  {
87
  "epoch": 1.0204081632653061,
88
+ "eval_cer": 9.633726526375389,
89
+ "eval_loss": 10.85368824005127,
90
+ "eval_runtime": 444.8852,
91
+ "eval_samples_per_second": 2.553,
92
+ "eval_steps_per_second": 0.16,
93
+ "eval_wer": 6.432243287950121,
94
  "step": 150
95
  },
96
  {
97
  "epoch": 1.0204081632653061,
98
  "step": 150,
99
  "total_flos": 2.3614434607104e+17,
100
+ "train_loss": 10.854962158203126,
101
+ "train_runtime": 4570.606,
102
+ "train_samples_per_second": 2.1,
103
+ "train_steps_per_second": 0.033
104
  }
105
  ],
106
  "logging_steps": 30,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb59dce1518b3dd6568a6cc562e4afcb56e424e6b498b28a4052dc7bfa10edd
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e284de677cedf31ded5945f8429c4273dde4e1fdd0f42a17e2b354207344d9
3
  size 5368
vocab.json CHANGED
The diff for this file is too large to render. See raw diff