Rouhani2025 commited on
Commit
9a2e756
·
verified ·
1 Parent(s): 40ee935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -20
app.py CHANGED
@@ -14,26 +14,30 @@ def transcribe_audio(audio_file):
14
  return transcription["text"]
15
 
16
  # Function to correct grammar in text
17
- def correct_text(raw_text):
18
- try:
19
- # Chunk the text into smaller pieces if needed (to handle token limits)
20
- text_chunks = chunk_text(raw_text, max_tokens=2000)
21
- corrected_chunks = []
22
-
23
- for chunk in text_chunks:
24
- # Provide clear instructions to the model
25
- prompt = f"Correct the following text for grammar and punctuation without changing its meaning: {chunk}"
26
- corrected = correction_pipeline(prompt, max_length=2048, num_return_sequences=1)[0]["generated_text"]
27
-
28
- # Add only the corrected part
29
- corrected_chunks.append(corrected.strip())
30
-
31
- # Combine corrected chunks
32
- final_corrected_text = " ".join(corrected_chunks).strip()
33
- return final_corrected_text
34
-
35
- except Exception as e:
36
- return f"Error in correction: {str(e)}"
 
 
 
 
37
 
38
  # Function to process the pipeline
39
  def process_pipeline(audio_file):
 
14
  return transcription["text"]
15
 
16
  # Function to correct grammar in text
17
+ def chunk_text(text, max_tokens=2000):
18
+ """
19
+ Splits the text into smaller chunks to ensure it doesn't exceed the token limit.
20
+ """
21
+ words = text.split()
22
+ chunks = []
23
+ chunk = []
24
+ current_tokens = 0
25
+
26
+ for word in words:
27
+ word_tokens = len(word.split()) # Approximate token count
28
+ if current_tokens + word_tokens > max_tokens:
29
+ chunks.append(" ".join(chunk))
30
+ chunk = [word]
31
+ current_tokens = word_tokens
32
+ else:
33
+ chunk.append(word)
34
+ current_tokens += word_tokens
35
+
36
+ if chunk:
37
+ chunks.append(" ".join(chunk))
38
+
39
+ return chunks
40
+
41
 
42
  # Function to process the pipeline
43
  def process_pipeline(audio_file):