Spaces:

ARI-HIPA-AI-Team
/

HIPA-AI

Sleeping

File size: 4,158 Bytes

331f4df
 
 
 
 
 
 
 
 
3e3fdaa
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
8f1c4bc
 
 
83f48db
8f1c4bc
 
 
331f4df
8f1c4bc
 
 
 
 
331f4df
3e3fdaa
 
 
 
 
 
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
b094da2
 
 
 
 
 
 
 
ada30fa
b094da2
 
 
 
 
4ccd3e0
 
 
ada30fa
4ccd3e0
ada30fa
4ccd3e0
 
b094da2
 
 
ada30fa
 
cf3f051
ada30fa
ec5b0bd
5171c7d
084f7f5
 
9055266
ada30fa
 
 
 
5171c7d
 
feda7cf
cf3f051
 
ada30fa
 
 
 
 
 
 
 
 
 
cf3f051
ada30fa
 
 
 
 
 
 
 
 
 
 
cf3f051
 
 
 
ada30fa
8f1c4bc
f3f1594
e0cd747
f3f1594
9055266
cf3f051
9055266
cf3f051
f3f1594
c580e55
 
ada30fa
c580e55
 
a11acd8
f3f1594
 
 
 
ada30fa
 
 
4ccd3e0

import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle

# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

# Download the model file from Hugging Face
model_url = "https://huggingface.co./Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"

response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
    f.write(response.content)

print(f"Model downloaded to {local_model_path}")

# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")

# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

print("Tokenizer loaded from tokenizer.pickle")

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word not in stop_words]
    doc = nlp(' '.join(tokens))
    lemmas = [token.lemma_ for token in doc]
    return ' '.join(lemmas)

def predict(text):
    try:
        print(f"Input text: {text}")
        inputs = preprocess_text(text)
        print(f"Preprocessed text: {inputs}")
        
        inputs = tokenizer.texts_to_sequences([inputs])
        print(f"Tokenized text: {inputs}")
        
        inputs = pad_sequences(inputs, maxlen=750, padding='post')
        print(f"Padded text: {inputs}")

        outputs = model.predict(inputs)
        print(f"Model outputs: {outputs}")
        
        # Interpret the output as a prediction
        prediction = outputs[0][0]
        if prediction >= 0.5:
            result = f"True = {prediction:.2f}"
        else:
            result = f"False = {prediction:.2f}"
        
        return result
    except Exception as e:
        print(f"Error during prediction: {e}")
        return f"Error during prediction: {e}"
ui_css = """
#body {
    height: 700px;
    width: 500px;
    background-color: rgb(108, 207, 239);
    border-radius: 15px;
}
#hipaa-image {
    width: 75px;
}
#input-box {
    width: 480px;
    border: 2px solid black;
    margin-left: 8px;
    margin-right: 8px;
    overflow-y: scroll;
    height: 150px;
    max-height: 150px;
}
#output-elems {
    width: 480px;
    border: 2px solid black;
    margin-left: 8px;
    margin-right: 8px;
    padding: 1em;
}
#submit-button, #clear-button {
    color: white;
    height: 45px;
    width: 60px;
    margin: 10px;
    border-radius: 5px;
    border: 5px solid black;
}
#submit-button {
    background-color: red;
}
#clear-button {
    background-color: grey;
}
#addinfo {
font-size: 16;
justify-self: center;
}
"""
# Set up the Gradio interface
with gr.Blocks(css=ui_css) as demo:
    with gr.Column(elem_id="body"):
        with gr.Row(elem_id="header"):
            with gr.Row(elem_id="hipaa-image"):
                gr.Image(value="hipaa-e1638383751916.png")
            with gr.Row():
                gr.Markdown("Enter text below to determine if it is a HIPAA violation. Smaller inputs may be less accurate.", elem_id="addinfo")
            
        with gr.Row(elem_id="interactives"):
            inputs=gr.Textbox(label="Enter Input Text Here", elem_id="input-box", lines=5)
            
        with gr.Row(elem_id="output-elems"):
            gr.Markdown("This text is a violation: ")
            outputs=gr.Textbox(label="", elem_id="output-box", interactive=False)
                
        with gr.Row():
            submit_button = gr.Button("Submit", elem_id="submit-button")
            clear_button = gr.Button("Clear", elem_id="clear-button")
        
    submit_button.click(predict, inputs=inputs, outputs=outputs)
    clear_button.click(lambda: ("", ""), inputs=None, outputs=[inputs, outputs])
demo.launch()