Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ from nltk.tokenize import word_tokenize
|
|
9 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
10 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
11 |
|
12 |
-
# Download and load necessary resources
|
13 |
import spacy.cli
|
14 |
spacy.cli.download("en_core_web_sm")
|
15 |
nltk.download('punkt_tab')
|
@@ -34,24 +33,35 @@ print(f"Model loaded from {local_model_path}")
|
|
34 |
|
35 |
def preprocess_text(text):
|
36 |
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
|
37 |
-
|
38 |
# Tokenize and remove stopwords
|
39 |
tokens = word_tokenize(text.lower())
|
40 |
tokens = [word for word in tokens if word not in stop_words]
|
41 |
-
|
42 |
# Lemmatize
|
43 |
doc = nlp(' '.join(tokens))
|
44 |
lemmas = [token.lemma_ for token in doc]
|
45 |
return ' '.join(lemmas)
|
46 |
|
47 |
def predict(text):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Set up the Gradio interface
|
56 |
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
|
57 |
-
demo.launch()
|
|
|
9 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
10 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
11 |
|
|
|
12 |
import spacy.cli
|
13 |
spacy.cli.download("en_core_web_sm")
|
14 |
nltk.download('punkt_tab')
|
|
|
33 |
|
34 |
def preprocess_text(text):
|
35 |
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
|
|
|
36 |
# Tokenize and remove stopwords
|
37 |
tokens = word_tokenize(text.lower())
|
38 |
tokens = [word for word in tokens if word not in stop_words]
|
|
|
39 |
# Lemmatize
|
40 |
doc = nlp(' '.join(tokens))
|
41 |
lemmas = [token.lemma_ for token in doc]
|
42 |
return ' '.join(lemmas)
|
43 |
|
44 |
def predict(text):
|
45 |
+
try:
|
46 |
+
print(f"Input text: {text}")
|
47 |
+
inputs = preprocess_text(text)
|
48 |
+
print(f"Preprocessed text: {inputs}")
|
49 |
+
|
50 |
+
# Ensure the input shape matches what the model expects
|
51 |
+
inputs = tokenizer.texts_to_sequences([inputs])
|
52 |
+
print(f"Tokenized text: {inputs}")
|
53 |
+
|
54 |
+
inputs = pad_sequences(inputs, maxlen=1000, padding='post')
|
55 |
+
print(f"Padded text: {inputs}")
|
56 |
+
|
57 |
+
outputs = model.predict(inputs)
|
58 |
+
print(f"Model outputs: {outputs}")
|
59 |
+
|
60 |
+
return f"This text is a violation = {outputs[0][0]:.2f}"
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error during prediction: {e}")
|
63 |
+
return f"Error during prediction: {e}"
|
64 |
|
65 |
# Set up the Gradio interface
|
66 |
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
|
67 |
+
demo.launch()
|