# Module Imports from pytube import YouTube import whisper import gradio as gr import time import re from happytransformer import HappyTextToText, TTSettings from difflib import Differ STTmodel = whisper.load_model("base.en") GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker") args = TTSettings(num_beams=5, min_length=1) # Functions def transcribe(file): options = dict(task="transcribe", best_of=5) text = STTmodel.transcribe(file, **options)["text"] return text.strip() def get_filename(file_obj): return file_obj.orig_name def inference(link): yt = YouTube(link) path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4") options = whisper.DecodingOptions(without_timestamps=True) results = STTmodel.transcribe(path) return results['text'] def populate_metadata(link): yt = YouTube(link) return yt.thumbnail_url, yt.title def transcribe_file(file): options = dict(task="transcribe", best_of=5) file = get_filename(file) text = STTmodel.transcribe(file, **options)["text"] return text.strip() def real_time_transcribe(audio, state=""): time.sleep(2) text = STTmodel.transcribe(audio)["text"] state += text + " " return state, state def paragraph_to_sentences(paragraph): """ This function takes a paragraph as input and returns a list of sentences. Args: paragraph (str): The paragraph to be converted to a list of sentences. Returns: list: A list of sentences extracted from the paragraph. """ # Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter. sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph) # Remove any leading or trailing spaces from each sentence. sentences = [sentence.strip() for sentence in sentences] return sentences def sentences_to_paragraph(sentences): final_result = "" for num, sentence in enumerate(sentences): result = GCmodel.generate_text("grammar: "+ sentence, args=args) final_result += result.text if num < len(sentences) - 1: final_result += " " return final_result # Function that takes transcribed result and gramify it def gramify(paragraph): result_1 = paragraph_to_sentences(paragraph) final_result = sentences_to_paragraph(result_1) return final_result # Function that takes transcribed text for its first inpu def diff_texts(text1, text2): """ This function takes transcribed text for its first input and grammatically corrected text as its second input which return the difference of the two text. """ d = Differ() return [ (token[2:], token[0] if token[0] != " " else None) for token in d.compare(text1, text2) ] res_diff = [] # Gradio Blocks demo = gr.Blocks() with demo: gr.Markdown("""

Speech To Text Grammar Checker

""") with gr.Tabs(): with gr.TabItem("Voice Record"): with gr.Row(): audio = gr.Audio(show_label=False,source="microphone",type="filepath") text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output") with gr.Row(): transcribe_button1 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row(): Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) with gr.TabItem("Upload File"): with gr.Row(): file_upload = gr.File() text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output") with gr.Row(): transcribe_button2 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row(): Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) with gr.TabItem("Youtube Link"): with gr.Box(): link = gr.Textbox(label="YouTube Link") with gr.Row().style(mobile_collapse=False, equal_height=True): title = gr.Label(label="Video Title", placeholder="Title") img = gr.Image(label="Thumbnail") text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5) with gr.Row().style(mobile_collapse=False, equal_height=True): transcribe_button3 = gr.Button("Transcribe") with gr.Row(): Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") with gr.Row().style(mobile_collapse=False, equal_height=True): Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) gr.Markdown("""

Not Satisfied with the result?
Click here to help us make it better.

""") with gr.Accordion("About",open=False): gr.Markdown("""

Thesis System presented by

• Daniel L. Espinola
• Jhon Vincent A. Gupo
• Ryan M. Ibay

In partial fulfillment of the requirements for the degree
Bachelor of Science in Computer Science Specialized in Intelligent Systems
Laguna State Polytechnic University - Los Baños Campus .

We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality.
• Crisanto F. Gulay - Adviser
• Gene Marck B. Catedrilla - Subject Specialist

""") link.change(populate_metadata, inputs=[link], outputs=[img, title]) # Transcription transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1) transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2) transcribe_button3.click(inference, inputs=link, outputs=text_link_output) # Gramify text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1) text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2) text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3) # For Text Difference Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1) Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2) Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3) demo.launch(share=True)