File size: 4,158 Bytes
331f4df
 
 
 
 
 
 
 
 
3e3fdaa
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
8f1c4bc
 
 
83f48db
8f1c4bc
 
 
331f4df
8f1c4bc
 
 
 
 
331f4df
3e3fdaa
 
 
 
 
 
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
b094da2
 
 
 
 
 
 
 
ada30fa
b094da2
 
 
 
 
4ccd3e0
 
 
ada30fa
4ccd3e0
ada30fa
4ccd3e0
 
b094da2
 
 
ada30fa
 
cf3f051
ada30fa
ec5b0bd
5171c7d
084f7f5
 
9055266
ada30fa
 
 
 
5171c7d
 
feda7cf
cf3f051
 
ada30fa
 
 
 
 
 
 
 
 
 
cf3f051
ada30fa
 
 
 
 
 
 
 
 
 
 
cf3f051
 
 
 
ada30fa
8f1c4bc
f3f1594
e0cd747
f3f1594
9055266
cf3f051
9055266
cf3f051
f3f1594
c580e55
 
ada30fa
c580e55
 
a11acd8
f3f1594
 
 
 
ada30fa
 
 
4ccd3e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle

# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

# Download the model file from Hugging Face
model_url = "https://huggingface.co./Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"

response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
    f.write(response.content)

print(f"Model downloaded to {local_model_path}")

# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")

# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

print("Tokenizer loaded from tokenizer.pickle")

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word not in stop_words]
    doc = nlp(' '.join(tokens))
    lemmas = [token.lemma_ for token in doc]
    return ' '.join(lemmas)

def predict(text):
    try:
        print(f"Input text: {text}")
        inputs = preprocess_text(text)
        print(f"Preprocessed text: {inputs}")
        
        inputs = tokenizer.texts_to_sequences([inputs])
        print(f"Tokenized text: {inputs}")
        
        inputs = pad_sequences(inputs, maxlen=750, padding='post')
        print(f"Padded text: {inputs}")

        outputs = model.predict(inputs)
        print(f"Model outputs: {outputs}")
        
        # Interpret the output as a prediction
        prediction = outputs[0][0]
        if prediction >= 0.5:
            result = f"True = {prediction:.2f}"
        else:
            result = f"False = {prediction:.2f}"
        
        return result
    except Exception as e:
        print(f"Error during prediction: {e}")
        return f"Error during prediction: {e}"
ui_css = """
#body {
    height: 700px;
    width: 500px;
    background-color: rgb(108, 207, 239);
    border-radius: 15px;
}
#hipaa-image {
    width: 75px;
}
#input-box {
    width: 480px;
    border: 2px solid black;
    margin-left: 8px;
    margin-right: 8px;
    overflow-y: scroll;
    height: 150px;
    max-height: 150px;
}
#output-elems {
    width: 480px;
    border: 2px solid black;
    margin-left: 8px;
    margin-right: 8px;
    padding: 1em;
}
#submit-button, #clear-button {
    color: white;
    height: 45px;
    width: 60px;
    margin: 10px;
    border-radius: 5px;
    border: 5px solid black;
}
#submit-button {
    background-color: red;
}
#clear-button {
    background-color: grey;
}
#addinfo {
font-size: 16;
justify-self: center;
}
"""
# Set up the Gradio interface
with gr.Blocks(css=ui_css) as demo:
    with gr.Column(elem_id="body"):
        with gr.Row(elem_id="header"):
            with gr.Row(elem_id="hipaa-image"):
                gr.Image(value="hipaa-e1638383751916.png")
            with gr.Row():
                gr.Markdown("Enter text below to determine if it is a HIPAA violation. Smaller inputs may be less accurate.", elem_id="addinfo")
            
        with gr.Row(elem_id="interactives"):
            inputs=gr.Textbox(label="Enter Input Text Here", elem_id="input-box", lines=5)
            
        with gr.Row(elem_id="output-elems"):
            gr.Markdown("This text is a violation: ")
            outputs=gr.Textbox(label="", elem_id="output-box", interactive=False)
                
        with gr.Row():
            submit_button = gr.Button("Submit", elem_id="submit-button")
            clear_button = gr.Button("Clear", elem_id="clear-button")
        
    submit_button.click(predict, inputs=inputs, outputs=outputs)
    clear_button.click(lambda: ("", ""), inputs=None, outputs=[inputs, outputs])
demo.launch()