Spaces:

Rouhani2025
/

speech-correction-demo

Sleeping

Rouhani2025 commited on Jan 1

Commit

9a2e756

verified ·

1 Parent(s): 40ee935

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,26 +14,30 @@ def transcribe_audio(audio_file):
     return transcription["text"]
 # Function to correct grammar in text
-def correct_text(raw_text):
-    try:
-        # Chunk the text into smaller pieces if needed (to handle token limits)
-        text_chunks = chunk_text(raw_text, max_tokens=2000)
-        corrected_chunks = []
-        for chunk in text_chunks:
-            # Provide clear instructions to the model
-            prompt = f"Correct the following text for grammar and punctuation without changing its meaning: {chunk}"
-            corrected = correction_pipeline(prompt, max_length=2048, num_return_sequences=1)[0]["generated_text"]
-            # Add only the corrected part
-            corrected_chunks.append(corrected.strip())
-        # Combine corrected chunks
-        final_corrected_text = " ".join(corrected_chunks).strip()
-        return final_corrected_text
-    except Exception as e:
-        return f"Error in correction: {str(e)}"
 # Function to process the pipeline
 def process_pipeline(audio_file):

     return transcription["text"]
 # Function to correct grammar in text
+def chunk_text(text, max_tokens=2000):
+    """
+    Splits the text into smaller chunks to ensure it doesn't exceed the token limit.
+    """
+    words = text.split()
+    chunks = []
+    chunk = []
+    current_tokens = 0
+    for word in words:
+        word_tokens = len(word.split())  # Approximate token count
+        if current_tokens + word_tokens > max_tokens:
+            chunks.append(" ".join(chunk))
+            chunk = [word]
+            current_tokens = word_tokens
+        else:
+            chunk.append(word)
+            current_tokens += word_tokens
+    if chunk:
+        chunks.append(" ".join(chunk))
+    return chunks
 # Function to process the pipeline
 def process_pipeline(audio_file):