sanchit-gandhi commited on
Commit
91b2b1a
·
verified ·
1 Parent(s): dcf699d

Saving train state of step 50000

Browse files
checkpoint-50000-epoch-3/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c35b07606e97495a37616ed994df43889eb4a598bc9c960d376b454e7d394f
3
+ size 3652769047
checkpoint-50000-epoch-3/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc60973ca81c4e6978d54a7a38afe8b605630b7e92e8b6ba71ce2ccd7f647ca3
3
+ size 2605239710
checkpoint-50000-epoch-3/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d3932b085d086fdd7cda745ea5df5da9de938dd02afb55c6d8c4ea852d0e007
3
+ size 16100
checkpoint-50000-epoch-3/random_states_1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d069517a6ab2bc4e3db00708c5709cf8f82627bfb66e373187266da80434e03d
3
+ size 16100
checkpoint-50000-epoch-3/random_states_2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc8acd2f855e9e41b27844d52e03888930c309dd4a91cec294b211d96da5d9f
3
+ size 16100
checkpoint-50000-epoch-3/random_states_3.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69188709da17af83e3184e136e61fb2ebe97237ba7fab6e70c2fa41f6d4a223
3
+ size 16100
checkpoint-50000-epoch-3/random_states_4.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f746cbf158872f2299407683e74fb90936fa3a4f250b8a34aa1aeaf6029b7ac
3
+ size 16100
checkpoint-50000-epoch-3/random_states_5.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9914ec22970a5b5c485e5d27c7e6f9f666fd0e6380fbaf59419527e4259f81
3
+ size 16100
checkpoint-50000-epoch-3/random_states_6.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e40ce9aab0771cb31fe95c7514c08ef76b2943053aba2836aea1fbbed10e28
3
+ size 16100
checkpoint-50000-epoch-3/random_states_7.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4562fde2759a668f94d65ab8fd4671e5e6c655f2461ad561a6bff168593b32
3
+ size 16100
checkpoint-50000-epoch-3/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585a68aa005b84644c654a9d79b3a7ee935259f1376345794bbe5821b4f7ac85
3
+ size 1000
starting_point_0.01.json CHANGED
@@ -10,7 +10,7 @@
10
  "prompt_tokenizer_name":"google/flan-t5-base",
11
 
12
  "report_to": ["wandb"],
13
- "overwrite_output_dir": true,
14
  "output_dir": "./",
15
 
16
  "train_dataset_name": "blabble-io/libritts_r+blabble-io/libritts_r+blabble-io/libritts_r+parler-tts/mls_eng_10k",
 
10
  "prompt_tokenizer_name":"google/flan-t5-base",
11
 
12
  "report_to": ["wandb"],
13
+ "overwrite_output_dir": false,
14
  "output_dir": "./",
15
 
16
  "train_dataset_name": "blabble-io/libritts_r+blabble-io/libritts_r+blabble-io/libritts_r+parler-tts/mls_eng_10k",
training/__pycache__/eval.cpython-311.pyc CHANGED
Binary files a/training/__pycache__/eval.cpython-311.pyc and b/training/__pycache__/eval.cpython-311.pyc differ
 
training/eval.py CHANGED
@@ -1,6 +1,6 @@
1
  import torch
2
  import evaluate
3
- from transformers import AutoModel, AutoProcessor, pipeline
4
 
5
 
6
  def clap_similarity(clap_model_name_or_path, texts, audios, device):
@@ -24,13 +24,36 @@ def clap_similarity(clap_model_name_or_path, texts, audios, device):
24
  def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_size, sampling_rate):
25
  metric = evaluate.load("wer")
26
  asr_pipeline = pipeline(model=asr_model_name_or_path, device=device)
 
 
 
 
 
27
  transcriptions = asr_pipeline(
28
  [{"raw": audio, "sampling_rate": sampling_rate} for audio in audios],
29
  batch_size=int(per_device_eval_batch_size),
 
30
  )
31
 
32
- word_error = 100 * metric.compute(
33
- predictions=[t["text"].lower() for t in transcriptions], references=[t.lower() for t in prompts]
34
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  return word_error, [t["text"] for t in transcriptions]
 
1
  import torch
2
  import evaluate
3
+ from transformers import AutoModel, AutoProcessor, pipeline, WhisperForConditionalGeneration, WhisperTokenizer, WhisperTokenizerFast
4
 
5
 
6
  def clap_similarity(clap_model_name_or_path, texts, audios, device):
 
24
  def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_size, sampling_rate):
25
  metric = evaluate.load("wer")
26
  asr_pipeline = pipeline(model=asr_model_name_or_path, device=device)
27
+
28
+ return_language = None
29
+ if isinstance(asr_pipeline.model, WhisperForConditionalGeneration):
30
+ return_language = True
31
+
32
  transcriptions = asr_pipeline(
33
  [{"raw": audio, "sampling_rate": sampling_rate} for audio in audios],
34
  batch_size=int(per_device_eval_batch_size),
35
+ return_language=return_language,
36
  )
37
 
38
+ if isinstance(asr_pipeline.tokenizer, (WhisperTokenizer, WhisperTokenizerFast)):
39
+ tokenizer = asr_pipeline.tokenizer
40
+ else:
41
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-large-v3")
42
+
43
+ english_normalizer = tokenizer.normalize
44
+ basic_normalizer = tokenizer.basic_normalize
45
+
46
+ normalized_predictions = []
47
+ normalized_references = []
48
+
49
+ for pred, ref in zip(transcriptions, prompts):
50
+ normalizer = english_normalizer if hasattr(pred, "language") and pred["language"] == "english" else basic_normalizer
51
+ norm_ref = normalizer(ref)
52
+ if len(norm_ref) > 0:
53
+ norm_pred = normalizer(pred["text"])
54
+ normalized_predictions.append(norm_pred)
55
+ normalized_references.append(norm_pred)
56
+
57
+ word_error = 100 * metric.compute(predictions=normalized_predictions, references=normalized_references)
58
 
59
  return word_error, [t["text"] for t in transcriptions]