import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) return pipeline('text-classification', model=model, tokenizer=tokenizer, truncation=True, max_length=512, top_k=4) classifier = load_model("ngocminhta/authscan-baseline") classifier2 = load_model("ngocminhta/authscan-baseline-machine") TEXT_CLASS_MAPPING_MACHINE = { 'LABEL_0': 'Gemini 1.5 Family', 'LABEL_1': 'Gemini 2.0 Family', 'LABEL_2': 'GPT-4o Family', 'LABEL_3': 'Llama 3.1 Family' } TEXT_CLASS_MAPPING = { 'LABEL_0': 'Human-Written', 'LABEL_1': 'Machine-Generated' } def update_language(language): if language == 'Java' or language == 'Python': return gr.update(language='python') elif language == 'C': return gr.update(language='c') elif language == 'C++': return gr.update(language='cpp') return gr.update(language='python') def process_result_detection_tab(text, language): result = classifier(f"Language: {language}\n\n{text}")[0] result_machine = classifier2(f"Language: {language}\n\n{text}")[0] labels = [TEXT_CLASS_MAPPING[x['label']] for x in result] labels_machine = [TEXT_CLASS_MAPPING_MACHINE[x['label']] for x in result_machine] scores = list(np.array([x['score'] for x in result])) scores_machine = list(np.array([x['score'] for x in result_machine])) final_results = dict(zip(labels, scores)) if max(final_results, key=final_results.get) == 'Machine-Generated': final_results_machine = dict(zip(labels_machine, scores_machine)) else: final_results_machine = None return final_results, final_results_machine def clear_detection_tab(): return "", gr.update(interactive=False) theme = gr.themes.Soft( primary_hue="teal", font=[gr.themes.GoogleFont('Open Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'], font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'], ) with gr.Blocks(theme=theme) as demo: gr.Markdown("""

AuthScan

""") with gr.Tab("Code Detection"): with gr.Row(): language = gr.Dropdown( choices=["C", "C++", "Java", "Python"], label="Select Programming Language", value="C" ) with gr.Row(): input_text = gr.Code( label="Enter code here", language="python", elem_id="code_input", ) with gr.Row(): check_button = gr.Button("Check Origin", variant="primary") clear_button = gr.Button("Clear", variant="stop") out = gr.Label(label='Result') out_machine = gr.Label(label='Detailed Information') # When language is changed, update the code component's language language.change(update_language, inputs=language, outputs=input_text) check_button.click(process_result_detection_tab, inputs=[input_text, language], outputs=[out, out_machine]) # out_machine.change(lambda x: gr.update(visible=True) if out_machine else gr.update(visible=False), inputs=out_machine, outputs=out_machine) clear_button.click(clear_detection_tab, inputs=[], outputs=[input_text, check_button]) with gr.Tab("Text Detection"): gr.Markdown("""Under development!""") demo.launch(share=True)