import gradio as gr import torch from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline from sentence_transformers import SentenceTransformer, util import random import re import nltk from nltk.tokenize import sent_tokenize import warnings from transformers import logging import os import tensorflow as tf import requests os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore") logging.set_verbosity_error() tf.get_logger().setLevel('ERROR') nltk.download('punkt') GROQ_API_KEY="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" class TextEnhancer: def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" print(self.device) self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5") self.paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(self.device) print("paraphraser loaded") self.grammar_pipeline = pipeline( "text2text-generation", model="Grammarly/coedit-large", device=0 if self.device == "cuda" else -1 ) print("grammar check loaded") self.similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2').to(self.device) print("sementics model loaded") def _evaluate_with_groq(self, passage=""): if not passage: raise ValueError("Input passage cannot be empty.") # Groq API setup headers = { "Authorization": f"Bearer {GROQ_API_KEY}", # Replace GROQ_API_KEY with your actual API key. "Content-Type": "application/json" } payload = { "model": "llama3-70b-8192", "messages": [ { "role": "system", "content": "Paraphrase this sentence to better suit it as an introductory sentence to a student's Statement of purpose. Ensure that the vocabulary and grammar is upto par. ONLY return the raw paraphrased sentence and nothing else.IF IT IS a empty string, return empty string " }, { "role": "user", "content": f"Here is the passage: {passage}" } ], "temperature": 1.0, "max_tokens": 8192 } # Sending request to Groq API print("Sending request to Groq API...") response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=headers) print("Response received.") # Handling the response if response.status_code == 200: data = response.json() try: segmented_text = data.get("choices", [{}])[0].get("message", {}).get("content", "") print("sentence paraphrase processed successfully.") print(segmented_text) return segmented_text except (IndexError, KeyError) as e: raise ValueError(f"Unexpected response structure from Groq API. Error: {str(e)}") else: raise ValueError(f"Groq API error: {response.status_code}, {response.text}") def _correct_formatting(self, sentence): cleaned_sentence = re.sub(r'([.,!?])\1+', r'\1', sentence) cleaned_sentence = cleaned_sentence.strip() return cleaned_sentence def enhance_text(self, text, min_similarity=0.8, max_variations=3): sent=0 enhanced_sentences = [] sentences = sent_tokenize(text) total_words = sum(len(sentence.split()) for sentence in sentences) print(f"generated: {total_words}") for sentence in sentences: if not sentence.strip(): continue sent+=1 inputs = self.paraphrase_tokenizer( f"paraphrase: {sentence}", return_tensors="pt", padding=True, max_length=150, truncation=True ).to(self.device) outputs = self.paraphrase_model.generate( **inputs, max_length=len(sentence.split()) + 20, num_return_sequences=max_variations, num_beams=max_variations, temperature=0.7 ) paraphrases = [ self.paraphrase_tokenizer.decode(output, skip_special_tokens=True) for output in outputs ] sentence_embedding = self.similarity_model.encode(sentence) paraphrase_embeddings = self.similarity_model.encode(paraphrases) similarities = util.cos_sim(sentence_embedding, paraphrase_embeddings) valid_paraphrases = [ para for para, sim in zip(paraphrases, similarities[0]) if sim >= min_similarity ] if sent in {1, len(sentences)} and valid_paraphrases: gemini_feedback = self._evaluate_with_groq(valid_paraphrases[0]) if gemini_feedback.strip(): valid_paraphrases[0] = gemini_feedback.strip() if valid_paraphrases: corrected = self.grammar_pipeline( valid_paraphrases[0], max_length=150, num_return_sequences=1 )[0]["generated_text"] corrected = self._humanize_text(corrected) corrected=self._correct_formatting(corrected) enhanced_sentences.append(corrected) else: sentence=self._correct_formatting(sentence) enhanced_sentences.append(sentence) enhanced_text = ". ".join(sentence.rstrip(".") for sentence in enhanced_sentences) + "." return enhanced_text def _humanize_text(self, text): contractions = {"can't": "cannot", "won't": "will not", "I'm": "I am", "it's": "it is"} words = text.split() text = " ".join([contractions.get(word, word) if random.random() > 0.9 else word for word in words]) if random.random() > 0.7: text = text.replace(" and ", ", and ") # Minor variations in sentence structure if random.random() > 0.5: text = text.replace(" is ", " happens to be ") return text def create_interface(): enhancer = TextEnhancer() def process_text(text, similarity_threshold=0.75): try: enhanced = enhancer.enhance_text( text, min_similarity=similarity_threshold / 100, max_variations=10 ) print("grammar enhanced") return enhanced except Exception as e: return f"Error: {str(e)}" interface = gr.Blocks() with interface: with gr.Row(elem_id="header", variant="panel"): gr.HTML("""