Lauler commited on
Commit
8d49820
·
verified ·
1 Parent(s): 2980ad5

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/leonardo_work/EUHPC_A01_006/models/whisper-small",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -7,10 +7,7 @@
7
  "WhisperForConditionalGeneration"
8
  ],
9
  "attention_dropout": 0.0,
10
- "begin_suppress_tokens": [
11
- 220,
12
- 50257
13
- ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
  "d_model": 768,
@@ -34,7 +31,7 @@
34
  "mask_time_length": 10,
35
  "mask_time_min_masks": 2,
36
  "mask_time_prob": 0.05,
37
- "max_length": 448,
38
  "max_source_positions": 1500,
39
  "max_target_positions": 448,
40
  "median_filter_width": 7,
@@ -44,7 +41,7 @@
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
  "torch_dtype": "float16",
47
- "transformers_version": "4.37.2",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
50
  "vocab_size": 51865
 
1
  {
2
+ "_name_or_path": "/leonardo_work/EUHPC_A01_006/experiments_whisper/stage1_results/small/",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
7
  "WhisperForConditionalGeneration"
8
  ],
9
  "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
 
 
 
11
  "bos_token_id": 50257,
12
  "classifier_proj_size": 256,
13
  "d_model": 768,
 
31
  "mask_time_length": 10,
32
  "mask_time_min_masks": 2,
33
  "mask_time_prob": 0.05,
34
+ "max_length": null,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
37
  "median_filter_width": 7,
 
41
  "pad_token_id": 50257,
42
  "scale_embedding": false,
43
  "torch_dtype": "float16",
44
+ "transformers_version": "4.45.2",
45
  "use_cache": true,
46
  "use_weighted_layer_sum": false,
47
  "vocab_size": 51865
generation_config.json CHANGED
@@ -160,6 +160,7 @@
160
  "<|yo|>": 50325,
161
  "<|zh|>": 50260
162
  },
 
163
  "max_initial_timestamp_index": 50,
164
  "max_length": 448,
165
  "no_timestamps_token_id": 50363,
@@ -256,9 +257,10 @@
256
  50361,
257
  50362
258
  ],
 
259
  "task_to_id": {
260
  "transcribe": 50359,
261
  "translate": 50358
262
  },
263
- "transformers_version": "4.37.2"
264
  }
 
160
  "<|yo|>": 50325,
161
  "<|zh|>": 50260
162
  },
163
+ "language": "<|sv|>",
164
  "max_initial_timestamp_index": 50,
165
  "max_length": 448,
166
  "no_timestamps_token_id": 50363,
 
257
  50361,
258
  50362
259
  ],
260
+ "task": "transcribe",
261
  "task_to_id": {
262
  "transcribe": 50359,
263
  "translate": 50358
264
  },
265
+ "transformers_version": "4.45.2"
266
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4968b89aef05596bf9fcc61c01c8d483637ee788b51b02cfcc5e67e44bebde7b
3
  size 563189936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc6e9b8a0b42dba65026354cf6247e4b5dbe6a1a3a663d7f0a0d1f5ef3a3ae7
3
  size 563189936
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": true,
4
  "added_tokens_decoder": {
5
  "50257": {
@@ -12977,14 +12976,11 @@
12977
  "<|notimestamps|>"
12978
  ],
12979
  "bos_token": "<|endoftext|>",
12980
- "clean_up_tokenization_spaces": true,
12981
  "dropout": 0.2,
12982
  "eos_token": "<|endoftext|>",
12983
- "errors": "replace",
12984
- "model_max_length": 1024,
12985
  "pad_token": "<|endoftext|>",
12986
- "processor_class": "WhisperProcessor",
12987
- "return_attention_mask": false,
12988
  "tokenizer_class": "WhisperTokenizer",
12989
  "unk_token": "<|endoftext|>"
12990
  }
 
1
  {
 
2
  "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "50257": {
 
12976
  "<|notimestamps|>"
12977
  ],
12978
  "bos_token": "<|endoftext|>",
12979
+ "clean_up_tokenization_spaces": false,
12980
  "dropout": 0.2,
12981
  "eos_token": "<|endoftext|>",
12982
+ "model_max_length": 1000000000000000019884624838656,
 
12983
  "pad_token": "<|endoftext|>",
 
 
12984
  "tokenizer_class": "WhisperTokenizer",
12985
  "unk_token": "<|endoftext|>"
12986
  }