Spaces:
Sleeping
Sleeping
File size: 4,158 Bytes
331f4df 3e3fdaa 331f4df 3e3fdaa 331f4df 8f1c4bc 83f48db 8f1c4bc 331f4df 8f1c4bc 331f4df 3e3fdaa 331f4df 3e3fdaa 331f4df b094da2 ada30fa b094da2 4ccd3e0 ada30fa 4ccd3e0 ada30fa 4ccd3e0 b094da2 ada30fa cf3f051 ada30fa ec5b0bd 5171c7d 084f7f5 9055266 ada30fa 5171c7d feda7cf cf3f051 ada30fa cf3f051 ada30fa cf3f051 ada30fa 8f1c4bc f3f1594 e0cd747 f3f1594 9055266 cf3f051 9055266 cf3f051 f3f1594 c580e55 ada30fa c580e55 a11acd8 f3f1594 ada30fa 4ccd3e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle
# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')
# Download the model file from Hugging Face
model_url = "https://huggingface.co./Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"
response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
f.write(response.content)
print(f"Model downloaded to {local_model_path}")
# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")
# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
tokenizer = pickle.load(handle)
print("Tokenizer loaded from tokenizer.pickle")
def preprocess_text(text):
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
tokens = word_tokenize(text.lower())
tokens = [word for word in tokens if word not in stop_words]
doc = nlp(' '.join(tokens))
lemmas = [token.lemma_ for token in doc]
return ' '.join(lemmas)
def predict(text):
try:
print(f"Input text: {text}")
inputs = preprocess_text(text)
print(f"Preprocessed text: {inputs}")
inputs = tokenizer.texts_to_sequences([inputs])
print(f"Tokenized text: {inputs}")
inputs = pad_sequences(inputs, maxlen=750, padding='post')
print(f"Padded text: {inputs}")
outputs = model.predict(inputs)
print(f"Model outputs: {outputs}")
# Interpret the output as a prediction
prediction = outputs[0][0]
if prediction >= 0.5:
result = f"True = {prediction:.2f}"
else:
result = f"False = {prediction:.2f}"
return result
except Exception as e:
print(f"Error during prediction: {e}")
return f"Error during prediction: {e}"
ui_css = """
#body {
height: 700px;
width: 500px;
background-color: rgb(108, 207, 239);
border-radius: 15px;
}
#hipaa-image {
width: 75px;
}
#input-box {
width: 480px;
border: 2px solid black;
margin-left: 8px;
margin-right: 8px;
overflow-y: scroll;
height: 150px;
max-height: 150px;
}
#output-elems {
width: 480px;
border: 2px solid black;
margin-left: 8px;
margin-right: 8px;
padding: 1em;
}
#submit-button, #clear-button {
color: white;
height: 45px;
width: 60px;
margin: 10px;
border-radius: 5px;
border: 5px solid black;
}
#submit-button {
background-color: red;
}
#clear-button {
background-color: grey;
}
#addinfo {
font-size: 16;
justify-self: center;
}
"""
# Set up the Gradio interface
with gr.Blocks(css=ui_css) as demo:
with gr.Column(elem_id="body"):
with gr.Row(elem_id="header"):
with gr.Row(elem_id="hipaa-image"):
gr.Image(value="hipaa-e1638383751916.png")
with gr.Row():
gr.Markdown("Enter text below to determine if it is a HIPAA violation. Smaller inputs may be less accurate.", elem_id="addinfo")
with gr.Row(elem_id="interactives"):
inputs=gr.Textbox(label="Enter Input Text Here", elem_id="input-box", lines=5)
with gr.Row(elem_id="output-elems"):
gr.Markdown("This text is a violation: ")
outputs=gr.Textbox(label="", elem_id="output-box", interactive=False)
with gr.Row():
submit_button = gr.Button("Submit", elem_id="submit-button")
clear_button = gr.Button("Clear", elem_id="clear-button")
submit_button.click(predict, inputs=inputs, outputs=outputs)
clear_button.click(lambda: ("", ""), inputs=None, outputs=[inputs, outputs])
demo.launch() |