Amir Zait commited on
Commit
d8ec8f4
1 Parent(s): 077c45d

fixed bugs

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -54,12 +54,12 @@ def parse_transcription(wav_file):
54
  convert(wav_file.name, filename + "16k.wav")
55
  speech, _ = sf.read(filename + "16k.wav")
56
  print(speech.shape)
57
- input_values = trans_processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
58
- logits = trans_model(input_values).logits
59
  predicted_ids = torch.argmax(logits, dim=-1)
60
- transcription = trans_processor.decode(predicted_ids[0], skip_special_tokens=True)
61
  translated = he_en_translator(trasncription)
62
- return transcription
63
 
64
  output = gr.outputs.Textbox(label="TEXT")
65
  input_mic = gr.inputs.Audio(source="microphone", type="file", optional=True)
@@ -70,5 +70,5 @@ gr.Interface(parse_transcription, inputs=[input_mic], outputs=output,
70
  show_tips=False,
71
  theme='huggingface',
72
  layout='horizontal',
73
- title="Draw Me A Ship in Hebrew",
74
  enable_queue=True).launch(inline=False)
 
54
  convert(wav_file.name, filename + "16k.wav")
55
  speech, _ = sf.read(filename + "16k.wav")
56
  print(speech.shape)
57
+ input_values = asr_processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
58
+ logits = asr_model(input_values).logits
59
  predicted_ids = torch.argmax(logits, dim=-1)
60
+ transcription = asr_processor.decode(predicted_ids[0], skip_special_tokens=True)
61
  translated = he_en_translator(trasncription)
62
+ return translated
63
 
64
  output = gr.outputs.Textbox(label="TEXT")
65
  input_mic = gr.inputs.Audio(source="microphone", type="file", optional=True)
 
70
  show_tips=False,
71
  theme='huggingface',
72
  layout='horizontal',
73
+ title="Draw Me A Sheep in Hebrew",
74
  enable_queue=True).launch(inline=False)