File size: 7,368 Bytes
126f94b
 
 
 
af969af
 
 
 
126f94b
af969af
 
 
126f94b
 
 
 
af969af
126f94b
 
 
 
 
 
 
 
 
af969af
126f94b
 
 
 
 
 
 
 
 
af969af
126f94b
 
af969af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126f94b
 
 
 
 
 
 
af969af
 
 
 
 
 
 
 
126f94b
 
 
 
af969af
 
 
 
 
 
126f94b
 
 
 
 
 
 
 
 
af969af
 
 
 
 
 
 
 
 
 
126f94b
 
 
 
 
 
 
 
 
af969af
126f94b
 
af969af
 
 
 
 
 
 
 
 
 
 
126f94b
af969af
 
 
 
 
813029b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# Module Imports
from pytube import YouTube
import whisper
import gradio as gr
import time
import re
from happytransformer import HappyTextToText, TTSettings
from difflib import Differ

STTmodel = whisper.load_model("base.en")
GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker")
args = TTSettings(num_beams=5, min_length=1)

# Functions
def transcribe(file):
    options = dict(task="transcribe", best_of=5)
    text = STTmodel.transcribe(file, **options)["text"]
    return text.strip()

def get_filename(file_obj):
    return file_obj.orig_name

def inference(link):
  yt = YouTube(link)
  path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
  options = whisper.DecodingOptions(without_timestamps=True)
  results = STTmodel.transcribe(path)
  return results['text']

def populate_metadata(link):
  yt = YouTube(link)
  return yt.thumbnail_url, yt.title

def transcribe_file(file):
    options = dict(task="transcribe", best_of=5)
    file = get_filename(file)
    text = STTmodel.transcribe(file, **options)["text"]
    return text.strip()

def real_time_transcribe(audio, state=""):
    time.sleep(2)
    text = STTmodel.transcribe(audio)["text"]
    state += text + " "
    return state, state

def paragraph_to_sentences(paragraph):
    """
    This function takes a paragraph as input and returns a list of sentences.

    Args:
        paragraph (str): The paragraph to be converted to a list of sentences.

    Returns:
        list: A list of sentences extracted from the paragraph.
    """
    # Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter.
    sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph)

    # Remove any leading or trailing spaces from each sentence.
    sentences = [sentence.strip() for sentence in sentences]

    return sentences

def sentences_to_paragraph(sentences):
  final_result = ""
  for num, sentence in enumerate(sentences):
    result = GCmodel.generate_text("grammar: "+ sentence, args=args)
    final_result += result.text
    if num < len(sentences) - 1:
        final_result += " "

  return final_result

# Function that takes transcribed result and gramify it  
def gramify(paragraph):
  result_1 = paragraph_to_sentences(paragraph)
  final_result = sentences_to_paragraph(result_1)
  return final_result

# Function that takes transcribed text for its first inpu
def diff_texts(text1, text2):
    """
    This function takes transcribed text for its first input 
    and grammatically corrected text as its second input which return the difference
    of the two text.
    """
    d = Differ()
    return [
        (token[2:], token[0] if token[0] != " " else None)
        for token in d.compare(text1, text2)
    ]
res_diff = []
# Gradio Blocks
demo = gr.Blocks()
with demo:
    gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""")
    with gr.Tabs():
        with gr.TabItem("Voice Record"):
            with gr.Row():
              audio = gr.Audio(show_label=False,source="microphone",type="filepath")
              text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
            with gr.Row():
              transcribe_button1 = gr.Button("Transcribe")
            with gr.Row():
              Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
            with gr.Row():
              Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
        with gr.TabItem("Upload File"):
            with gr.Row():
                file_upload = gr.File()
                text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
            with gr.Row():
              transcribe_button2 = gr.Button("Transcribe")
            with gr.Row():
              Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
            with gr.Row():
              Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
        with gr.TabItem("Youtube Link"):
            with gr.Box():
                link = gr.Textbox(label="YouTube Link")
                with gr.Row().style(mobile_collapse=False, equal_height=True):
                  title = gr.Label(label="Video Title", placeholder="Title")
                  img = gr.Image(label="Thumbnail")
                text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
                with gr.Row().style(mobile_collapse=False, equal_height=True): 
                  transcribe_button3 = gr.Button("Transcribe")
                with gr.Row():
                  Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
                with gr.Row().style(mobile_collapse=False, equal_height=True):
                  Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
        gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br>
                <a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a> 
                </p>""") 
    
        with gr.Accordion("About",open=False):
          gr.Markdown("""
            <p style="text-align: center;"> Thesis System presented by  <br/> <br/>
            • <b>Daniel L. Espinola</b> <br/>
            • <b>Jhon Vincent A. Gupo</b> <br/>
            • <b>Ryan M. Ibay</b> <br/> <br/>
            In partial fulfillment of the requirements for the degree <br/>   
            Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/>
            Laguna State Polytechnic University - Los Baños Campus . <br/>  <br/> 
            We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
            • <b>Crisanto F. Gulay</b> - Adviser <br/>
            • <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/>
            </p>
            """)
          link.change(populate_metadata, inputs=[link], outputs=[img, title])

          # Transcription
          transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
          transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
          transcribe_button3.click(inference, inputs=link, outputs=text_link_output)

          # Gramify
          text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1)
          text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2)
          text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3)

          # For Text Difference
          Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1)
          Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2)
          Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3)
     
demo.launch()