Spaces:

ARI-HIPA-AI-Team
/

HIPA-AI

Sleeping

Zmorell commited on Dec 5, 2024

Commit

b094da2

verified ·

1 Parent(s): 8f1c4bc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ from nltk.tokenize import word_tokenize
 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
-# Download and load necessary resources
 import spacy.cli
 spacy.cli.download("en_core_web_sm")
 nltk.download('punkt_tab')
@@ -34,24 +33,35 @@ print(f"Model loaded from {local_model_path}")
 def preprocess_text(text):
     text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Only remove non-alphanumeric characters except spaces
     # Tokenize and remove stopwords
     tokens = word_tokenize(text.lower())
     tokens = [word for word in tokens if word not in stop_words]
     # Lemmatize
     doc = nlp(' '.join(tokens))
     lemmas = [token.lemma_ for token in doc]
     return ' '.join(lemmas)
 def predict(text):
-    inputs = preprocess_text(text)
-    # Ensure the input shape matches what the model expects
-    inputs = tokenizer.texts_to_sequences([inputs])
-    inputs = pad_sequences(inputs, maxlen=1000, padding='post')
-    outputs = model.predict(inputs)
-    return f"This text is a violation = {outputs[0][0]:.2f}"
 # Set up the Gradio interface
 demo = gr.Interface(fn=predict, inputs="text", outputs="text")
-demo.launch()

 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import spacy.cli
 spacy.cli.download("en_core_web_sm")
 nltk.download('punkt_tab')
 def preprocess_text(text):
     text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Only remove non-alphanumeric characters except spaces
     # Tokenize and remove stopwords
     tokens = word_tokenize(text.lower())
     tokens = [word for word in tokens if word not in stop_words]
     # Lemmatize
     doc = nlp(' '.join(tokens))
     lemmas = [token.lemma_ for token in doc]
     return ' '.join(lemmas)
 def predict(text):
+    try:
+        print(f"Input text: {text}")
+        inputs = preprocess_text(text)
+        print(f"Preprocessed text: {inputs}")
+        # Ensure the input shape matches what the model expects
+        inputs = tokenizer.texts_to_sequences([inputs])
+        print(f"Tokenized text: {inputs}")
+        inputs = pad_sequences(inputs, maxlen=1000, padding='post')
+        print(f"Padded text: {inputs}")
+        outputs = model.predict(inputs)
+        print(f"Model outputs: {outputs}")
+        return f"This text is a violation = {outputs[0][0]:.2f}"
+    except Exception as e:
+        print(f"Error during prediction: {e}")
+        return f"Error during prediction: {e}"
 # Set up the Gradio interface
 demo = gr.Interface(fn=predict, inputs="text", outputs="text")
+demo.launch()