authscan-demo / app.py
ngocminhta's picture
Update app.py
4d4d0e1 verified
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return pipeline('text-classification', model=model, tokenizer=tokenizer, truncation=True, max_length=512, top_k=4)
classifier = load_model("ngocminhta/authscan-baseline")
classifier2 = load_model("ngocminhta/authscan-baseline-machine")
TEXT_CLASS_MAPPING_MACHINE = {
'LABEL_0': 'Gemini 1.5 Family',
'LABEL_1': 'Gemini 2.0 Family',
'LABEL_2': 'GPT-4o Family',
'LABEL_3': 'Llama 3.1 Family'
}
TEXT_CLASS_MAPPING = {
'LABEL_0': 'Human-Written',
'LABEL_1': 'Machine-Generated'
}
def update_language(language):
if language == 'Java' or language == 'Python':
return gr.update(language='python')
elif language == 'C':
return gr.update(language='c')
elif language == 'C++':
return gr.update(language='cpp')
return gr.update(language='python')
def process_result_detection_tab(text, language):
result = classifier(f"Language: {language}\n\n{text}")[0]
result_machine = classifier2(f"Language: {language}\n\n{text}")[0]
labels = [TEXT_CLASS_MAPPING[x['label']] for x in result]
labels_machine = [TEXT_CLASS_MAPPING_MACHINE[x['label']] for x in result_machine]
scores = list(np.array([x['score'] for x in result]))
scores_machine = list(np.array([x['score'] for x in result_machine]))
final_results = dict(zip(labels, scores))
if max(final_results, key=final_results.get) == 'Machine-Generated':
final_results_machine = dict(zip(labels_machine, scores_machine))
else:
final_results_machine = None
return final_results, final_results_machine
def clear_detection_tab():
return "", gr.update(interactive=False)
theme = gr.themes.Soft(
primary_hue="teal",
font=[gr.themes.GoogleFont('Open Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'],
)
with gr.Blocks(theme=theme) as demo:
gr.Markdown("""<h1><center>AuthScan</center></h1>""")
with gr.Tab("Code Detection"):
with gr.Row():
language = gr.Dropdown(
choices=["C", "C++", "Java", "Python"],
label="Select Programming Language",
value="C"
)
with gr.Row():
input_text = gr.Code(
label="Enter code here",
language="python",
elem_id="code_input",
)
with gr.Row():
check_button = gr.Button("Check Origin", variant="primary")
clear_button = gr.Button("Clear", variant="stop")
out = gr.Label(label='Result')
out_machine = gr.Label(label='Detailed Information')
# When language is changed, update the code component's language
language.change(update_language, inputs=language, outputs=input_text)
check_button.click(process_result_detection_tab, inputs=[input_text, language], outputs=[out, out_machine])
# out_machine.change(lambda x: gr.update(visible=True) if out_machine else gr.update(visible=False), inputs=out_machine, outputs=out_machine)
clear_button.click(clear_detection_tab, inputs=[], outputs=[input_text, check_button])
with gr.Tab("Text Detection"):
gr.Markdown("""Under development!""")
demo.launch(share=True)