NbAiLab
/

wav2vec2-xls-r-1b-npsc-bokmaal

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

versae commited on Feb 6, 2022

Commit

095b715

•

1 Parent(s): f625488

Update eval.py

Files changed (1) hide show

eval.py +2 -8

eval.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Dict
 import torch
 from datasets import Audio, Dataset, load_dataset, load_metric
-from transformers import AutoFeatureExtractor, AutoModel, AutoTokenizer, pipeline
 def log_results(result: Dataset, args: Dict[str, str]):
@@ -81,7 +81,6 @@ def normalize_text(text: str) -> str:
 def main(args):
     # load dataset
     dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
-    #dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True).filter(lambda entry: re.search("nb-nn", entry["sentence_language_code"], flags=re.IGNORECASE))
     # for testing: only process the first two examples as a test
     # dataset = dataset.select(range(10))
@@ -96,12 +95,7 @@ def main(args):
     # load eval pipeline
     if args.device is None:
         args.device = 0 if torch.cuda.is_available() else -1
-    asr = pipeline("automatic-speech-recognition",
-        model=AutoModel.from_pretrained(args.model_id),
-        tokenizer=AutoTokenizer.from_pretrained(args.model_id),
-        feature_extractor=feature_extractor,
-        device=args.device
-    )
     # map function to decode audio
     def map_to_pred(batch):

 import torch
 from datasets import Audio, Dataset, load_dataset, load_metric
+from transformers import AutoFeatureExtractor, pipeline
 def log_results(result: Dataset, args: Dict[str, str]):
 def main(args):
     # load dataset
     dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
     # for testing: only process the first two examples as a test
     # dataset = dataset.select(range(10))
     # load eval pipeline
     if args.device is None:
         args.device = 0 if torch.cuda.is_available() else -1
+    asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
     # map function to decode audio
     def map_to_pred(batch):