Zmorell commited on
Commit
b094da2
·
verified ·
1 Parent(s): 8f1c4bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -9,7 +9,6 @@ from nltk.tokenize import word_tokenize
9
  from tensorflow.keras.preprocessing.text import Tokenizer
10
  from tensorflow.keras.preprocessing.sequence import pad_sequences
11
 
12
- # Download and load necessary resources
13
  import spacy.cli
14
  spacy.cli.download("en_core_web_sm")
15
  nltk.download('punkt_tab')
@@ -34,24 +33,35 @@ print(f"Model loaded from {local_model_path}")
34
 
35
  def preprocess_text(text):
36
  text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
37
-
38
  # Tokenize and remove stopwords
39
  tokens = word_tokenize(text.lower())
40
  tokens = [word for word in tokens if word not in stop_words]
41
-
42
  # Lemmatize
43
  doc = nlp(' '.join(tokens))
44
  lemmas = [token.lemma_ for token in doc]
45
  return ' '.join(lemmas)
46
 
47
  def predict(text):
48
- inputs = preprocess_text(text)
49
- # Ensure the input shape matches what the model expects
50
- inputs = tokenizer.texts_to_sequences([inputs])
51
- inputs = pad_sequences(inputs, maxlen=1000, padding='post')
52
- outputs = model.predict(inputs)
53
- return f"This text is a violation = {outputs[0][0]:.2f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Set up the Gradio interface
56
  demo = gr.Interface(fn=predict, inputs="text", outputs="text")
57
- demo.launch()
 
9
  from tensorflow.keras.preprocessing.text import Tokenizer
10
  from tensorflow.keras.preprocessing.sequence import pad_sequences
11
 
 
12
  import spacy.cli
13
  spacy.cli.download("en_core_web_sm")
14
  nltk.download('punkt_tab')
 
33
 
34
  def preprocess_text(text):
35
  text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
 
36
  # Tokenize and remove stopwords
37
  tokens = word_tokenize(text.lower())
38
  tokens = [word for word in tokens if word not in stop_words]
 
39
  # Lemmatize
40
  doc = nlp(' '.join(tokens))
41
  lemmas = [token.lemma_ for token in doc]
42
  return ' '.join(lemmas)
43
 
44
  def predict(text):
45
+ try:
46
+ print(f"Input text: {text}")
47
+ inputs = preprocess_text(text)
48
+ print(f"Preprocessed text: {inputs}")
49
+
50
+ # Ensure the input shape matches what the model expects
51
+ inputs = tokenizer.texts_to_sequences([inputs])
52
+ print(f"Tokenized text: {inputs}")
53
+
54
+ inputs = pad_sequences(inputs, maxlen=1000, padding='post')
55
+ print(f"Padded text: {inputs}")
56
+
57
+ outputs = model.predict(inputs)
58
+ print(f"Model outputs: {outputs}")
59
+
60
+ return f"This text is a violation = {outputs[0][0]:.2f}"
61
+ except Exception as e:
62
+ print(f"Error during prediction: {e}")
63
+ return f"Error during prediction: {e}"
64
 
65
  # Set up the Gradio interface
66
  demo = gr.Interface(fn=predict, inputs="text", outputs="text")
67
+ demo.launch()