HIPA-AI / app.py
BraydenAC's picture
Update app.py
ec5b0bd verified
import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle
# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')
# Download the model file from Hugging Face
model_url = "https://huggingface.co./Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"
response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
f.write(response.content)
print(f"Model downloaded to {local_model_path}")
# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")
# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
tokenizer = pickle.load(handle)
print("Tokenizer loaded from tokenizer.pickle")
def preprocess_text(text):
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
tokens = word_tokenize(text.lower())
tokens = [word for word in tokens if word not in stop_words]
doc = nlp(' '.join(tokens))
lemmas = [token.lemma_ for token in doc]
return ' '.join(lemmas)
def predict(text):
try:
print(f"Input text: {text}")
inputs = preprocess_text(text)
print(f"Preprocessed text: {inputs}")
inputs = tokenizer.texts_to_sequences([inputs])
print(f"Tokenized text: {inputs}")
inputs = pad_sequences(inputs, maxlen=750, padding='post')
print(f"Padded text: {inputs}")
outputs = model.predict(inputs)
print(f"Model outputs: {outputs}")
# Interpret the output as a prediction
prediction = outputs[0][0]
if prediction >= 0.5:
result = f"True = {prediction:.2f}"
else:
result = f"False = {prediction:.2f}"
return result
except Exception as e:
print(f"Error during prediction: {e}")
return f"Error during prediction: {e}"
ui_css = """
#body {
height: 700px;
width: 500px;
background-color: rgb(108, 207, 239);
border-radius: 15px;
}
#hipaa-image {
width: 75px;
}
#input-box {
width: 480px;
border: 2px solid black;
margin-left: 8px;
margin-right: 8px;
overflow-y: scroll;
height: 150px;
max-height: 150px;
}
#output-elems {
width: 480px;
border: 2px solid black;
margin-left: 8px;
margin-right: 8px;
padding: 1em;
}
#submit-button, #clear-button {
color: white;
height: 45px;
width: 60px;
margin: 10px;
border-radius: 5px;
border: 5px solid black;
}
#submit-button {
background-color: red;
}
#clear-button {
background-color: grey;
}
#addinfo {
font-size: 16;
justify-self: center;
}
"""
# Set up the Gradio interface
with gr.Blocks(css=ui_css) as demo:
with gr.Column(elem_id="body"):
with gr.Row(elem_id="header"):
with gr.Row(elem_id="hipaa-image"):
gr.Image(value="hipaa-e1638383751916.png")
with gr.Row():
gr.Markdown("Enter text below to determine if it is a HIPAA violation. Smaller inputs may be less accurate.", elem_id="addinfo")
with gr.Row(elem_id="interactives"):
inputs=gr.Textbox(label="Enter Input Text Here", elem_id="input-box", lines=5)
with gr.Row(elem_id="output-elems"):
gr.Markdown("This text is a violation: ")
outputs=gr.Textbox(label="", elem_id="output-box", interactive=False)
with gr.Row():
submit_button = gr.Button("Submit", elem_id="submit-button")
clear_button = gr.Button("Clear", elem_id="clear-button")
submit_button.click(predict, inputs=inputs, outputs=outputs)
clear_button.click(lambda: ("", ""), inputs=None, outputs=[inputs, outputs])
demo.launch()