# Module Imports from pytube import YouTube import whisper import gradio as gr import time import re from happytransformer import HappyTextToText, TTSettings from difflib import Differ STTmodel = whisper.load_model("base.en") GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker") args = TTSettings(num_beams=5, min_length=1) # Functions def transcribe(file): options = dict(task="transcribe", best_of=5) text = STTmodel.transcribe(file, **options)["text"] return text.strip() def get_filename(file_obj): return file_obj.orig_name def inference(link): yt = YouTube(link) path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4") options = whisper.DecodingOptions(without_timestamps=True) results = STTmodel.transcribe(path) return results['text'] def populate_metadata(link): yt = YouTube(link) return yt.thumbnail_url, yt.title def transcribe_file(file): options = dict(task="transcribe", best_of=5) file = get_filename(file) text = STTmodel.transcribe(file, **options)["text"] return text.strip() def real_time_transcribe(audio, state=""): time.sleep(2) text = STTmodel.transcribe(audio)["text"] state += text + " " return state, state def paragraph_to_sentences(paragraph): """ This function takes a paragraph as input and returns a list of sentences. Args: paragraph (str): The paragraph to be converted to a list of sentences. Returns: list: A list of sentences extracted from the paragraph. """ # Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter. sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph) # Remove any leading or trailing spaces from each sentence. sentences = [sentence.strip() for sentence in sentences] return sentences def sentences_to_paragraph(sentences): final_result = "" for num, sentence in enumerate(sentences): result = GCmodel.generate_text("grammar: "+ sentence, args=args) final_result += result.text if num < len(sentences) - 1: final_result += " " return final_result # Function that takes transcribed result and gramify it def gramify(paragraph): result_1 = paragraph_to_sentences(paragraph) final_result = sentences_to_paragraph(result_1) return final_result # Function that takes transcribed text for its first inpu def diff_texts(text1, text2): """ This function takes transcribed text for its first input and grammatically corrected text as its second input which return the difference of the two text. """ d = Differ() return [ (token[2:], token[0] if token[0] != " " else None) for token in d.compare(text1, text2) ] res_diff = [] # Gradio Blocks demo = gr.Blocks() with demo: gr.Markdown("""

Speech To Text Grammar Checker

""") with gr.Tabs(): with gr.TabItem("Voice Record"): with gr.Row(): audio = gr.Audio(show_label=False,source="microphone",type="filepath") text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output") with gr.Row(): transcribe_button1 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row(): Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) with gr.TabItem("Upload File"): with gr.Row(): file_upload = gr.File() text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output") with gr.Row(): transcribe_button2 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row(): Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) with gr.TabItem("Youtube Link"): with gr.Box(): link = gr.Textbox(label="YouTube Link") with gr.Row().style(mobile_collapse=False, equal_height=True): title = gr.Label(label="Video Title", placeholder="Title") img = gr.Image(label="Thumbnail") text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5) with gr.Row().style(mobile_collapse=False, equal_height=True): transcribe_button3 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row().style(mobile_collapse=False, equal_height=True): Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) gr.Markdown("""

Not Satisfied with the result?
Click here to help us make it better.

""") with gr.Accordion("About",open=False): gr.Markdown("""

Thesis System presented by

Daniel L. Espinola
Jhon Vincent A. Gupo
Ryan M. Ibay

In partial fulfillment of the requirements for the degree
Bachelor of Science in Computer Science Specialized in Intelligent Systems
Laguna State Polytechnic University - Los Baños Campus .

We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality.
Crisanto F. Gulay - Adviser
Gene Marck B. Catedrilla - Subject Specialist

""") link.change(populate_metadata, inputs=[link], outputs=[img, title]) # Transcription transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1) transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2) transcribe_button3.click(inference, inputs=link, outputs=text_link_output) # Gramify text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1) text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2) text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3) # For Text Difference Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1) Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2) Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3) demo.launch(share=True)