import time from typing import Dict, List import gradio as gr import pandas as pd from transformers import pipeline class NERDemo: def __init__(self): self.ner_pipeline = pipeline( "ner", model="enesmanan/multilingual-xlm-roberta-ner", aggregation_strategy="simple", ) self.supported_languages = { "en": "English", "de": "German", "tr": "Turkish", "es": "Spanish", "fr": "French", } def process_ner(self, text: str, language: str) -> Dict: """Process text through NER pipeline and return entities with metadata""" if not text: return {"text": "", "entities": []} start_time = time.time() entities = self.ner_pipeline(text) processing_time = round((time.time() - start_time) * 1000, 2) # ms # Create DataFrame for entity statistics if entities: df = pd.DataFrame(entities) entity_stats = df["entity_group"].value_counts().to_dict() else: entity_stats = {} return { "text": text, "entities": entities, "stats": entity_stats, "processing_time": processing_time, } def create_demo(self) -> gr.Interface: """Create and configure the Gradio interface""" theme = gr.themes.Base( primary_hue="blue", secondary_hue="slate", font=gr.themes.GoogleFont("Source Sans Pro"), neutral_hue="slate", ).set( body_text_color="*neutral_950", block_background_fill="*neutral_50", block_border_width="0px", button_primary_background_fill="*primary_500", button_primary_background_fill_hover="*primary_600", button_primary_text_color="white", input_background_fill="white", block_radius="lg", ) with gr.Blocks(theme=theme) as demo: with gr.Row(): gr.HTML( """

Multilingual Named Entity Recognition

This demo uses XLM-RoBERTa model fine-tuned for NER tasks in multiple languages. Automatically detects and highlights named entities such as persons, organizations, locations, and more.

""" ) with gr.Row(): with gr.Column(scale=3): text_input = gr.Textbox( label="Input Text", placeholder="Enter text in any supported language...", lines=3, ) language = gr.Dropdown( choices=list(self.supported_languages.values()), label="Language (Optional)", value="English", ) with gr.Row(): submit_btn = gr.Button("Analyze", variant="primary") clear_btn = gr.Button("Clear") with gr.Column(scale=2): with gr.Group(): gr.HTML( """

Entity Statistics

""" ) stats_output = gr.Json(label="Detected Entities") time_output = gr.Markdown(elem_classes="text-sm text-gray-600") highlighted_output = gr.HighlightedText( label="Detected Entities", show_legend=True ) # Example inputs examples = [ [ "Emma Watson starred in Harry Potter and studied at Oxford University while working with United Nations.", "English", ], [ "Die Deutsche Bank hat ihren Hauptsitz in Frankfurt, während BMW in München produziert.", "German", ], [ "Enes Fehmi Manan, İzmir'de yaşıyor ve Fibababanka'da çalışıyor.", "Turkish", ], [ "Le Louvre à Paris expose la Joconde de Leonardo da Vinci depuis le XIXe siècle.", "French", ], [ "El Real Madrid jugará contra el Barcelona en el Santiago Bernabéu el próximo mes.", "Spanish", ], ] gr.Examples(examples, inputs=[text_input, language]) # Event handlers def process_and_format(text: str, lang: str) -> tuple: result = self.process_ner(text, lang) stats = result["stats"] time_msg = f"Processing time: {result['processing_time']} ms" return (result, stats, time_msg) submit_btn.click( process_and_format, inputs=[text_input, language], outputs=[highlighted_output, stats_output, time_output], ) clear_btn.click( lambda: (None, None, ""), outputs=[highlighted_output, stats_output, time_output], ) return demo if __name__ == "__main__": ner_demo = NERDemo() demo = ner_demo.create_demo() demo.launch(share=True)