arun100 commited on
Commit
321b86e
·
verified ·
1 Parent(s): 451617b

Training in progress, step 100

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "vasista22/whisper-hindi-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
1
  {
2
+ "_name_or_path": "qanastek/whisper-small-french-uncased",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96793a88a265c07ef4184ca8fe117b013d58b2b7c85cc356aa07bbb9dcbcf3a6
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3622ab230d796bea00496f6ee443c80ebb2a7d176777ff4817c17d1c4347bb50
3
  size 966995080
run_cmn.sh CHANGED
@@ -1,23 +1,23 @@
1
  python3 run_speech_recognition_seq2seq_streaming.py \
2
- --model_name_or_path="vasista22/whisper-hindi-small" \
3
  --dataset_name="mozilla-foundation/common_voice_16_0" \
4
- --dataset_config_name="hi" \
5
- --language="hindi" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
- --model_index_name="Whisper Base Bengali" \
9
- --max_steps="100" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="16" \
12
- --per_device_eval_batch_size="16" \
13
  --gradient_accumulation_steps=4 \
14
  --logging_steps="25" \
15
- --learning_rate="1e-6" \
16
- --warmup_steps="10" \
17
  --evaluation_strategy="steps" \
18
- --eval_steps="50" \
19
  --save_strategy="steps" \
20
- --save_steps="50" \
21
  --generation_max_length="225" \
22
  --length_column_name="input_length" \
23
  --max_duration_in_seconds="30" \
 
1
  python3 run_speech_recognition_seq2seq_streaming.py \
2
+ --model_name_or_path="qanastek/whisper-small-french-uncased" \
3
  --dataset_name="mozilla-foundation/common_voice_16_0" \
4
+ --dataset_config_name="fr" \
5
+ --language="french" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
+ --model_index_name="Whisper Base French" \
9
+ --max_steps="1000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="16" \
12
+ --per_device_eval_batch_size="32" \
13
  --gradient_accumulation_steps=4 \
14
  --logging_steps="25" \
15
+ --learning_rate="5e-7" \
16
+ --warmup_steps="100" \
17
  --evaluation_strategy="steps" \
18
+ --eval_steps="100" \
19
  --save_strategy="steps" \
20
+ --save_steps="100" \
21
  --generation_max_length="225" \
22
  --length_column_name="input_length" \
23
  --max_duration_in_seconds="30" \
runs/Jan23_05-57-07_ip-172-31-68-44/events.out.tfevents.1705989446.ip-172-31-68-44.20295.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0486f429675408de46cc06040fcd0b4163ab6cde2d89a5eebc319fd79fdd9b7f
3
+ size 6071
tokenizer.json CHANGED
@@ -985,7 +985,7 @@
985
  },
986
  {
987
  "SpecialToken": {
988
- "id": "<|hi|>",
989
  "type_id": 0
990
  }
991
  },
@@ -1023,7 +1023,7 @@
1023
  },
1024
  {
1025
  "SpecialToken": {
1026
- "id": "<|hi|>",
1027
  "type_id": 0
1028
  }
1029
  },
@@ -1068,13 +1068,13 @@
1068
  "<|endoftext|>"
1069
  ]
1070
  },
1071
- "<|hi|>": {
1072
- "id": "<|hi|>",
1073
  "ids": [
1074
- 50276
1075
  ],
1076
  "tokens": [
1077
- "<|hi|>"
1078
  ]
1079
  },
1080
  "<|notimestamps|>": {
 
985
  },
986
  {
987
  "SpecialToken": {
988
+ "id": "<|fr|>",
989
  "type_id": 0
990
  }
991
  },
 
1023
  },
1024
  {
1025
  "SpecialToken": {
1026
+ "id": "<|fr|>",
1027
  "type_id": 0
1028
  }
1029
  },
 
1068
  "<|endoftext|>"
1069
  ]
1070
  },
1071
+ "<|fr|>": {
1072
+ "id": "<|fr|>",
1073
  "ids": [
1074
+ 50265
1075
  ],
1076
  "tokens": [
1077
+ "<|fr|>"
1078
  ]
1079
  },
1080
  "<|notimestamps|>": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:982b5193e750d2177a9de84e737d1bca6d1f68d33c76512f2aec4c722e52b389
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c402a950cc7e208f84081823c51369e4011a49eb775a25599d930a6775401c83
3
  size 4792