import time
from typing import Dict, List
import gradio as gr
import pandas as pd
from transformers import pipeline
class NERDemo:
def __init__(self):
self.ner_pipeline = pipeline(
"ner",
model="enesmanan/multilingual-xlm-roberta-ner",
aggregation_strategy="simple",
)
self.supported_languages = {
"en": "English",
"de": "German",
"tr": "Turkish",
"es": "Spanish",
"fr": "French",
}
def process_ner(self, text: str, language: str) -> Dict:
"""Process text through NER pipeline and return entities with metadata"""
if not text:
return {"text": "", "entities": []}
start_time = time.time()
entities = self.ner_pipeline(text)
processing_time = round((time.time() - start_time) * 1000, 2) # ms
# Create DataFrame for entity statistics
if entities:
df = pd.DataFrame(entities)
entity_stats = df["entity_group"].value_counts().to_dict()
else:
entity_stats = {}
return {
"text": text,
"entities": entities,
"stats": entity_stats,
"processing_time": processing_time,
}
def create_demo(self) -> gr.Interface:
"""Create and configure the Gradio interface"""
theme = gr.themes.Base(
primary_hue="blue",
secondary_hue="slate",
font=gr.themes.GoogleFont("Source Sans Pro"),
neutral_hue="slate",
).set(
body_text_color="*neutral_950",
block_background_fill="*neutral_50",
block_border_width="0px",
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
button_primary_text_color="white",
input_background_fill="white",
block_radius="lg",
)
with gr.Blocks(theme=theme) as demo:
with gr.Row():
gr.HTML(
"""
Multilingual Named Entity Recognition
This demo uses XLM-RoBERTa model fine-tuned for NER tasks in multiple languages.
Automatically detects and highlights named entities such as persons, organizations, locations, and more.
"""
)
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
label="Input Text",
placeholder="Enter text in any supported language...",
lines=3,
)
language = gr.Dropdown(
choices=list(self.supported_languages.values()),
label="Language (Optional)",
value="English",
)
with gr.Row():
submit_btn = gr.Button("Analyze", variant="primary")
clear_btn = gr.Button("Clear")
with gr.Column(scale=2):
with gr.Group():
gr.HTML(
"""
Entity Statistics
"""
)
stats_output = gr.Json(label="Detected Entities")
time_output = gr.Markdown(elem_classes="text-sm text-gray-600")
highlighted_output = gr.HighlightedText(
label="Detected Entities", show_legend=True
)
# Example inputs
examples = [
[
"Emma Watson starred in Harry Potter and studied at Oxford University while working with United Nations.",
"English",
],
[
"Die Deutsche Bank hat ihren Hauptsitz in Frankfurt, während BMW in München produziert.",
"German",
],
[
"Enes Fehmi Manan, İzmir'de yaşıyor ve Fibababanka'da çalışıyor.",
"Turkish",
],
[
"Le Louvre à Paris expose la Joconde de Leonardo da Vinci depuis le XIXe siècle.",
"French",
],
[
"El Real Madrid jugará contra el Barcelona en el Santiago Bernabéu el próximo mes.",
"Spanish",
],
]
gr.Examples(examples, inputs=[text_input, language])
# Event handlers
def process_and_format(text: str, lang: str) -> tuple:
result = self.process_ner(text, lang)
stats = result["stats"]
time_msg = f"Processing time: {result['processing_time']} ms"
return (result, stats, time_msg)
submit_btn.click(
process_and_format,
inputs=[text_input, language],
outputs=[highlighted_output, stats_output, time_output],
)
clear_btn.click(
lambda: (None, None, ""),
outputs=[highlighted_output, stats_output, time_output],
)
return demo
if __name__ == "__main__":
ner_demo = NERDemo()
demo = ner_demo.create_demo()
demo.launch(share=True)