sanchit-gandhi
commited on
Saving train state of step 50000
Browse files- checkpoint-50000-epoch-3/optimizer.bin +3 -0
- checkpoint-50000-epoch-3/pytorch_model.bin +3 -0
- checkpoint-50000-epoch-3/random_states_0.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_1.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_2.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_3.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_4.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_5.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_6.pkl +3 -0
- checkpoint-50000-epoch-3/random_states_7.pkl +3 -0
- checkpoint-50000-epoch-3/scheduler.bin +3 -0
- starting_point_0.01.json +1 -1
- training/__pycache__/eval.cpython-311.pyc +0 -0
- training/eval.py +27 -4
checkpoint-50000-epoch-3/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09c35b07606e97495a37616ed994df43889eb4a598bc9c960d376b454e7d394f
|
3 |
+
size 3652769047
|
checkpoint-50000-epoch-3/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc60973ca81c4e6978d54a7a38afe8b605630b7e92e8b6ba71ce2ccd7f647ca3
|
3 |
+
size 2605239710
|
checkpoint-50000-epoch-3/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d3932b085d086fdd7cda745ea5df5da9de938dd02afb55c6d8c4ea852d0e007
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d069517a6ab2bc4e3db00708c5709cf8f82627bfb66e373187266da80434e03d
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_2.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cc8acd2f855e9e41b27844d52e03888930c309dd4a91cec294b211d96da5d9f
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_3.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f69188709da17af83e3184e136e61fb2ebe97237ba7fab6e70c2fa41f6d4a223
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_4.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f746cbf158872f2299407683e74fb90936fa3a4f250b8a34aa1aeaf6029b7ac
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_5.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f9914ec22970a5b5c485e5d27c7e6f9f666fd0e6380fbaf59419527e4259f81
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_6.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87e40ce9aab0771cb31fe95c7514c08ef76b2943053aba2836aea1fbbed10e28
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/random_states_7.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d4562fde2759a668f94d65ab8fd4671e5e6c655f2461ad561a6bff168593b32
|
3 |
+
size 16100
|
checkpoint-50000-epoch-3/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:585a68aa005b84644c654a9d79b3a7ee935259f1376345794bbe5821b4f7ac85
|
3 |
+
size 1000
|
starting_point_0.01.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"prompt_tokenizer_name":"google/flan-t5-base",
|
11 |
|
12 |
"report_to": ["wandb"],
|
13 |
-
"overwrite_output_dir":
|
14 |
"output_dir": "./",
|
15 |
|
16 |
"train_dataset_name": "blabble-io/libritts_r+blabble-io/libritts_r+blabble-io/libritts_r+parler-tts/mls_eng_10k",
|
|
|
10 |
"prompt_tokenizer_name":"google/flan-t5-base",
|
11 |
|
12 |
"report_to": ["wandb"],
|
13 |
+
"overwrite_output_dir": false,
|
14 |
"output_dir": "./",
|
15 |
|
16 |
"train_dataset_name": "blabble-io/libritts_r+blabble-io/libritts_r+blabble-io/libritts_r+parler-tts/mls_eng_10k",
|
training/__pycache__/eval.cpython-311.pyc
CHANGED
Binary files a/training/__pycache__/eval.cpython-311.pyc and b/training/__pycache__/eval.cpython-311.pyc differ
|
|
training/eval.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import torch
|
2 |
import evaluate
|
3 |
-
from transformers import AutoModel, AutoProcessor, pipeline
|
4 |
|
5 |
|
6 |
def clap_similarity(clap_model_name_or_path, texts, audios, device):
|
@@ -24,13 +24,36 @@ def clap_similarity(clap_model_name_or_path, texts, audios, device):
|
|
24 |
def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_size, sampling_rate):
|
25 |
metric = evaluate.load("wer")
|
26 |
asr_pipeline = pipeline(model=asr_model_name_or_path, device=device)
|
|
|
|
|
|
|
|
|
|
|
27 |
transcriptions = asr_pipeline(
|
28 |
[{"raw": audio, "sampling_rate": sampling_rate} for audio in audios],
|
29 |
batch_size=int(per_device_eval_batch_size),
|
|
|
30 |
)
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
return word_error, [t["text"] for t in transcriptions]
|
|
|
1 |
import torch
|
2 |
import evaluate
|
3 |
+
from transformers import AutoModel, AutoProcessor, pipeline, WhisperForConditionalGeneration, WhisperTokenizer, WhisperTokenizerFast
|
4 |
|
5 |
|
6 |
def clap_similarity(clap_model_name_or_path, texts, audios, device):
|
|
|
24 |
def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_size, sampling_rate):
|
25 |
metric = evaluate.load("wer")
|
26 |
asr_pipeline = pipeline(model=asr_model_name_or_path, device=device)
|
27 |
+
|
28 |
+
return_language = None
|
29 |
+
if isinstance(asr_pipeline.model, WhisperForConditionalGeneration):
|
30 |
+
return_language = True
|
31 |
+
|
32 |
transcriptions = asr_pipeline(
|
33 |
[{"raw": audio, "sampling_rate": sampling_rate} for audio in audios],
|
34 |
batch_size=int(per_device_eval_batch_size),
|
35 |
+
return_language=return_language,
|
36 |
)
|
37 |
|
38 |
+
if isinstance(asr_pipeline.tokenizer, (WhisperTokenizer, WhisperTokenizerFast)):
|
39 |
+
tokenizer = asr_pipeline.tokenizer
|
40 |
+
else:
|
41 |
+
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-large-v3")
|
42 |
+
|
43 |
+
english_normalizer = tokenizer.normalize
|
44 |
+
basic_normalizer = tokenizer.basic_normalize
|
45 |
+
|
46 |
+
normalized_predictions = []
|
47 |
+
normalized_references = []
|
48 |
+
|
49 |
+
for pred, ref in zip(transcriptions, prompts):
|
50 |
+
normalizer = english_normalizer if hasattr(pred, "language") and pred["language"] == "english" else basic_normalizer
|
51 |
+
norm_ref = normalizer(ref)
|
52 |
+
if len(norm_ref) > 0:
|
53 |
+
norm_pred = normalizer(pred["text"])
|
54 |
+
normalized_predictions.append(norm_pred)
|
55 |
+
normalized_references.append(norm_pred)
|
56 |
+
|
57 |
+
word_error = 100 * metric.compute(predictions=normalized_predictions, references=normalized_references)
|
58 |
|
59 |
return word_error, [t["text"] for t in transcriptions]
|