Spaces:

thaboe01
/

Shona-Spell-Checking

Paused

thaboe01 commited on May 20

Commit

1085827

•

1 Parent(s): 0180518

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,8 +26,8 @@ def correct_text(text):
         if len(current_chunk) + 1 > MAX_PHRASE_LENGTH:
             input_text = PREFIX + " ".join(current_chunk)
             input_ids = tokenizer(input_text, return_tensors="pt").input_ids
-            outputs = model.generate(input_ids)
-            corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(PREFIX):]  # Remove the prefix
             corrected_phrases.append(corrected_phrase)
             current_chunk = []  # Reset the chunk
@@ -35,8 +35,8 @@ def correct_text(text):
     if current_chunk:
         input_text = PREFIX + " ".join(current_chunk)
         input_ids = tokenizer(input_text, return_tensors="pt").input_ids
-        outputs = model.generate(input_ids)
-        corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(PREFIX):]
         corrected_phrases.append(corrected_phrase)
     return " ".join(corrected_phrases)  # Join the corrected chunks

         if len(current_chunk) + 1 > MAX_PHRASE_LENGTH:
             input_text = PREFIX + " ".join(current_chunk)
             input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+            outputs = model.generate(**input_ids)
+            corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)
             corrected_phrases.append(corrected_phrase)
             current_chunk = []  # Reset the chunk
     if current_chunk:
         input_text = PREFIX + " ".join(current_chunk)
         input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+        outputs = model.generate(**input_ids)
+        corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)
         corrected_phrases.append(corrected_phrase)
     return " ".join(corrected_phrases)  # Join the corrected chunks