import torch from flask import Flask, render_template, request from difflib import HtmlDiff import pandas as pd import os #os.environ['HF_HOME'] = '/remote/t3dev4/anmolm/sanchit/mlapp/huggingface' from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, T5ForConditionalGeneration app = Flask(__name__) # Load Grammarly Coedit-Large model tokenizer = AutoTokenizer.from_pretrained("grammarly/coedit-large") model = T5ForConditionalGeneration.from_pretrained("grammarly/coedit-large") # Load custom dataset custom_dataset_path = "styleguide_words.csv" custom_dataset = pd.read_csv(custom_dataset_path) # Create a mapping between words to be replaced and their replacements replacement_mapping = dict(zip(custom_dataset["Not Allowed"], custom_dataset["Replacement"])) @app.route('/') def index(): return render_template('index.html') @app.route('/correct', methods=['POST']) def correct(): text = request.form['text'] corrected_text = grammar_correction(text) return render_template('result.html', original_text=text, corrected_text=corrected_text) @app.route('/styleguide', methods=['POST']) def styleguide(): text = request.form['corrected_text'] highlighted_text, suggestions = apply_styleguide(text) return render_template('styleguide.html', corrected_text=text, highlighted_text=highlighted_text, suggestions=suggestions) @app.route('/compare', methods=['POST']) def compare(): original_text = request.form['original_text'] final_text = request.form['final_text'] highlighted_changes = highlight_changes(original_text, final_text) return render_template('compare.html', original_text=original_text, final_text=final_text, highlighted_changes=highlighted_changes) def grammar_correction(text): # Split the text into sentences sentences = text.split(". ") corrected_sentences = [] for sentence in sentences: # Check if the sentence is a command (starts with "-" or contains "_") if sentence.startswith("-") or "_" in sentence: corrected_sentences.append(sentence) # Skip the command line continue # Tokenize input text input_ids = tokenizer(sentence, return_tensors="pt").input_ids outputs = model.generate(input_ids, max_length=256) edited_text = tokenizer.decode(outputs[0], skip_special_tokens=True) corrected_sentences.append(edited_text) # inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True) # # Generate corrected text using the model # with torch.no_grad(): # outputs = model.generate(**inputs) # # Decode and append corrected sentence to list # corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) # corrected_sentences.append(corrected_sentence) # Join the corrected sentences into a single paragraph corrected_text = ". ".join(corrected_sentences) return corrected_text def apply_styleguide(text): # Highlight words mentioned in the CSV file and suggest replacements highlighted_text = text suggestions = [] for not_allowed_word, replacement_word in replacement_mapping.items(): if not_allowed_word in highlighted_text: highlighted_text = highlighted_text.replace(not_allowed_word, f'{not_allowed_word} ({replacement_word})') suggestions.append((not_allowed_word, replacement_word)) return highlighted_text, suggestions def highlight_changes(original_text, final_text): # Function to highlight changes between original and final text # You can modify this function as needed diff = HtmlDiff() highlighted_changes = diff.make_table(original_text.splitlines(), final_text.splitlines(), context=True, numlines=2) return highlighted_changes if __name__ == '__main__': app.run(debug=True)