Spaces:
Running
Running
import gradio as gr | |
from lettucedetect.models.inference import HallucinationDetector | |
import os | |
title = """# 🙋🏻♂️Welcome to 🌟Tonic's 🥬 LettuceDetect - 🤯🧠 Hallucination Tester 🟢🔴 | |
Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support! | |
""" | |
description2 = """ | |
### Model Details | |
- **Model Name**: [lettucedect-large-modernbert-en-v1](https://huggingface.co./KRLabsOrg/lettucedect-large-modernbert-en-v1) | |
- **Organization**: [KRLabsOrg](https://huggingface.co./KRLabsOrg) | |
- **Github**: [https://github.com/KRLabsOrg/LettuceDetect](https://github.com/KRLabsOrg/LettuceDetect) | |
- **Architecture**: ModernBERT (Large) with extended context support up to 8192 tokens | |
- **Task**: Token Classification / Hallucination Detection | |
- **Training Dataset**: [RagTruth](https://huggingface.co./datasets/wandb/RAGTruth-processed) | |
- **Language**: English | |
- **Capabilities**: Detects hallucinated spans in answers, provides confidence scores, and calculates average confidence across detected spans. | |
LettuceDetect excels at processing long documents to determine if an answer aligns with the provided context, making it a powerful tool for ensuring factual accuracy. | |
""" | |
howto1 = """ | |
### How to Use LettuceDetect Tester | |
1. **Enter a Context**: Provide the source text or document (e.g., "France is a country in Europe..."). This is the factual basis for evaluation. | |
2. **Enter a Question**: Ask something related to the context (e.g., "What is the capital of France?"). | |
3. **Enter an Answer**: Input the response you want to check (e.g., "The capital of France is Paris. The population is 69 million."). | |
4. **Press Submit**: Analyze the answer for hallucinations! | |
""" | |
howto2 = """ | |
### Understanding the Output | |
- **Status**: | |
- 🟢 = No hallucinations detected | |
- 🔴 = Hallucinations detected | |
- ⚪ = Error occurred | |
- **Explanation**: A brief summary of the result. | |
- **Highlighted Answer**: Shows the answer with hallucinated parts in **red**, labeled with confidence scores (e.g., "hallucination (conf: 0.9944)"). | |
- **Hallucinated Spans & Confidence**: Lists each hallucinated segment with its confidence score. | |
- **Average Confidence**: Displays the average confidence of all detected hallucinations (e.g., "Average Confidence: 0.9944"). | |
Use this tool to ensure your answers are grounded in reality! | |
""" | |
join_us = """ | |
## Join us: | |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 | |
[Join us on Discord](https://discord.gg/n8ytYeh25n) | |
On 🤗Huggingface: [MultiTransformer](https://huggingface.co./MultiTransformer) | |
On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Data Tonic](https://github.com/multiTonic/thinking-dataset/) | |
🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 | |
""" | |
# Initialize the LettuceDetect model | |
detector = HallucinationDetector( | |
method="transformer", | |
model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1" | |
) | |
# Function to evaluate hallucination with LettuceDetect | |
def evaluate_hallucination(context, question, answer): | |
try: | |
# Get span-level predictions from LettuceDetect | |
predictions = detector.predict( | |
context=[context], | |
question=question, | |
answer=answer, | |
output_format="spans" | |
) | |
# Process predictions for HighlightedText | |
if not predictions: | |
return "🟢", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A" | |
highlighted_segments = [] | |
confidence_scores = [] | |
last_end = 0 | |
total_confidence = 0.0 | |
for pred in predictions: | |
start, end = pred['start'], pred['end'] | |
confidence = pred['confidence'] | |
text = pred['text'] | |
# Add non-hallucinated text before this span | |
if last_end < start: | |
highlighted_segments.append((answer[last_end:start], None)) | |
# Add hallucinated span with confidence as label | |
label_with_confidence = f"hallucination (conf: {confidence:.4f})" | |
highlighted_segments.append((text, label_with_confidence)) | |
confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}") | |
total_confidence += confidence | |
last_end = end | |
# Add any remaining text after the last hallucination | |
if last_end < len(answer): | |
highlighted_segments.append((answer[last_end:], None)) | |
# Calculate average confidence | |
avg_confidence = total_confidence / len(predictions) if predictions else 0.0 | |
# Determine overall status | |
status = "🔴" if predictions else "🟢" | |
explanation = "Hallucinations detected" if predictions else "No hallucinations detected" | |
return ( | |
status, | |
explanation, | |
highlighted_segments, | |
"\n".join(confidence_scores) if confidence_scores else "N/A", | |
f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A" | |
) | |
except Exception as e: | |
return "⚪", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A" | |
# Gradio Blocks interface | |
with gr.Blocks( | |
title="🥬 LettuceDetect Hallucination Tester 🟢🔴" | |
) as demo: | |
gr.Markdown(title) | |
with gr.Row(): | |
with gr.Group(): | |
gr.Markdown(description2) | |
with gr.Group(): | |
gr.Markdown(howto2) | |
with gr.Row(): | |
with gr.Group(): | |
gr.Markdown(howto1) | |
with gr.Group(): | |
gr.Markdown(join_us) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Inputs | |
context_input = gr.Textbox( | |
label="Context", | |
lines=5, | |
placeholder="Enter the context (e.g., a document or source text)..." | |
) | |
question_input = gr.Textbox( | |
label="Question", | |
placeholder="Enter the question..." | |
) | |
answer_input = gr.Textbox( | |
label="Answer", | |
lines=3, | |
placeholder="Enter the answer to evaluate..." | |
) | |
submit_btn = gr.Button("Submit") | |
with gr.Column(scale=3): | |
with gr.Row(): | |
with gr.Column(): | |
status_output = gr.Label(label="Status") | |
with gr.Column(): | |
explanation_output = gr.Textbox(label="Explanation", interactive=False) | |
highlighted_answer_output = gr.HighlightedText( | |
label="Answer with Hallucinations Highlighted", | |
show_legend=False, | |
color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category | |
combine_adjacent=True | |
) | |
spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False) | |
avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False) | |
# Connect inputs to outputs via the evaluation function | |
submit_btn.click( | |
fn=evaluate_hallucination, | |
inputs=[context_input, question_input, answer_input], | |
outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output] | |
) | |
# Example | |
gr.Markdown("### Example") | |
with gr.Row(): | |
gr.Examples( | |
examples=[ | |
[ | |
"France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", | |
"What is the capital of France? What is the population of France?", | |
"The capital of France is Paris. The population of France is 69 million." | |
] | |
], | |
inputs=[context_input, question_input, answer_input] | |
) | |
# Launch the demo | |
demo.launch() |