Spaces:

ajchri5
/

164-S2-Assignment_2

Runtime error

App Files Files Community

164-S2-Assignment_2 / app.py

ajchri5

Upload 3 files

0b2a06f verified about 2 months ago

raw

history blame contribute delete

6.32 kB

	# -- coding: utf-8 --
	"""Assignment-2-IT164_ajchri5

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1RtE7mmtyUWwiuowgyQq4eCuH-ep_D1QQ
	"""

	# mount gd
	from google.colab import drive
	drive.mount('/content/drive')

	# Commented out IPython magic to ensure Python compatibility.
	# # token
	# %%capture
	# from google.colab import userdata
	# hftoken=userdata.get('hftoken')

	# Commented out IPython magic to ensure Python compatibility.
	# # pi
	# %%capture
	# !pip install gradio
	# !pip install huggingface_hub

	# packages required for colab
	!pip install gradio
	!pip install transformers
	!pip install torchaudio
	!pip install fasttext

	# fastText for language detection
	!wget https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin

	# imports required for colab
	import gradio as gr
	from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline, EncoderDecoderCache
	import torchaudio
	import warnings
	import fasttext
	import pandas as pd
	import csv
	import os

	# hides warnings with pysoundfile
	warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed.*")

	# load model 1 transcription
	whisper_model_name = "openai/whisper-large"
	processor = WhisperProcessor.from_pretrained(whisper_model_name)
	whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name)

	# load model 2 translation
	translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-ROMANCE-en")

	# load additional model 3 language detection
	lang_model = fasttext.load_model('lid.176.bin') # pre-trained model

	# app usage history
	history_data = []

	# save data csv
	def saveData(text, language, translated_text, confidence_score):
	# gd path
	file_path = '/content/drive/MyDrive/IT164/a2prompt.csv'

	# check if file exists, if not make new one with headers
	file_exists = os.path.isfile(file_path)

	# open csv file to append data
	with open(file_path, 'a', newline='', encoding='utf-8') as f:
	w = csv.writer(f)
	if not file_exists:
	# write header if file is created
	w.writerow(['Text', 'Language', 'Translation', 'Confidence Score'])
	# write new data row
	w.writerow([text, language, translated_text, confidence_score])

	# load audio input and transcribe
	def transcribe_audio(audio_file, sampling_rate=48000): # set to 48 kHz
	# load audio file with torchaudio
	waveform, sr = torchaudio.load(audio_file, normalize=True)

	# max 16kHz (resample)
	if sr != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000) # resample to 16 kHz
	waveform = transform(waveform)
	sr = 16000 # update as 16 kHz

	# whisperprocessor
	inputs = processor(waveform.squeeze(0).numpy(), return_tensors="pt", sampling_rate=sr)

	# generate transcription and handle "past_key_values deprecation" error
	past_key_values = None
	generated_ids = whisper_model.generate(
	inputs["input_features"],
	past_key_values=past_key_values
	)

	# encoderdecodercache (to handle past_key_values)
	if past_key_values is not None:
	past_key_values = EncoderDecoderCache.from_legacy_cache(past_key_values)

	return processor.decode(generated_ids[0], skip_special_tokens=True)

	# detect language using fastText
	def detect_language(text):
	result = lang_model.predict(text) # predict language with fasttext
	language = result[0][0].replace('__label__', '') # extract the predicted language label
	score = result[1][0] # confidence score
	return language, score

	# translate text (to english)
	def translate_text_to_english(text, source_lang="fr"):
	# translate detected language
	translation = translation_model(text, src_lang=source_lang, tgt_lang="en")
	return translation[0]['translation_text']

	# function to track history (save results to the list and save to csv)
	def save_to_history(text, language, translation, confidence_score):
	history_data.append([text, language, translation, confidence_score])
	# save csv
	saveData(text, language, translation, confidence_score)

	# process audio, transcribe, detect language, and translate
	def process_audio(audio_file):
	transcription = transcribe_audio(audio_file, sampling_rate=48000) # use 48 kHz initially (mac rate)
	language, score = detect_language(transcription) # detect language of the transcription
	translated_text = translate_text_to_english(transcription, source_lang=language) # translate
	save_to_history(transcription, language, translated_text, score) # save results
	return transcription, language, score, translated_text

	# update visibility of the history table in gradio
	def update_vis(radio_value):
	if radio_value == 'show':
	return gr.DataFrame(pd.DataFrame(history_data, columns=["Text", "Language", "Translation", "Confidence Score"]), visible=True)
	else:
	return gr.DataFrame(pd.DataFrame(history_data, columns=["Text", "Language", "Translation", "Confidence Score"]), visible=False)

	# gradio interface
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="Record your voice", type="filepath") # audio input
	transcription_output = gr.Textbox(label="Transcription") # transcription output
	language_output = gr.Textbox(label="Detected Language") # detected language output
	score_output = gr.Textbox(label="Confidence Score") # confidence score output
	translated_output = gr.Textbox(label="Translated Text to English") # translated text output
	process_button = gr.Button("Process Audio") # button to process the audio

	with gr.Column():
	history = gr.Radio(['show', 'hide'], label="App usage history") # "show" or "hide" (history)
	dataframe = gr.DataFrame(pd.DataFrame(history_data, columns=["Text", "Language", "Translation", "Confidence Score"]), visible=False)

	# button click (process audio and display output)
	process_button.click(fn=process_audio, inputs=[audio_input], outputs=[transcription_output, language_output, score_output, translated_output])
	history.change(fn=update_vis, inputs=history, outputs=dataframe)

	demo.launch(debug=True)