Spaces:

Tonic
/

hallucination-test

Running

App Files Files Community

Tonic commited on 5 days ago

Commit

aa57e68

unverified ·

1 Parent(s): 46cd776

add new hallucination testing

Browse files

Files changed (2) hide show

app.py +137 -40
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,48 +1,145 @@
 import gradio as gr
-import requests
-import os
-# Define the API parameters
-API_URL = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
-API_TOKEN = os.getenv("HF_AUTH_TOKEN")
-if not API_TOKEN:
-    raise ValueError("Please set the HF_AUTH_TOKEN environment variable.")
-headers = {"Authorization": f"Bearer {API_TOKEN}"}
-# Function to query the API
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
-# Function to be called by the Gradio interface
-def evaluate_hallucination(input1, input2):
-    # Combine the inputs
-    combined_input = f"{input1}. {input2}"
-    # Make the API call
-    output = query({"inputs": combined_input})
-    # Extract the score from the output
-    score = output[0][0]['score']
-    # Return a red or green circle based on the score
-    if score < 0.5:
-        return "🔴", "The score is less than 0.5"
-    else:
-        return "🟢", "The score is greater than 0.5"
-# Create the Gradio interface
-iface = gr.Interface(
-    fn=evaluate_hallucination,
-    inputs=[gr.Textbox(label="Assertion"), gr.Textbox(label="Citation")],
-    outputs=[gr.Label(), gr.Textbox(label="Explanation")],
-    live=False,
-    title="👋🏻Welcome to 🌟Tonic's 🧠🌈Hallucination Tester 🔴🟢",
-    description="How To Use 🌈Hallucination tester: 🗣️📝add any assertion from an LLM or a human 🗣️😷 add any citation from a RAG retriever or a source 👇🏻📩 Press send 🔴red means a 🌈hallucination, 🟢 green means a 🧠credible assertion. Check out the model [vectara/hallucination_evaluation_model](https://huggingface.co/vectara/hallucination_evaluation_model) You can also use 🥒🍆🫑Vectara - Hallucination Tester 🗣️😷 via API below or way by cloning this space. 🧬🔬🔍 Simply click here: Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [DataTonic](https://github.com/Tonic-AI/DataTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗",
-    theme='ParityError/Anime',
-)
-# Launch the interface
-iface.launch()

 import gradio as gr
+from lettucedetect.models.inference import HallucinationDetector
+import os
+# Initialize the LettuceDetect model
+detector = HallucinationDetector(
+    method="transformer",
+    model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1"
+)
+# Function to evaluate hallucination with LettuceDetect
+def evaluate_hallucination(context, question, answer):
+    try:
+        # Get span-level predictions from LettuceDetect
+        predictions = detector.predict(
+            context=[context],
+            question=question,
+            answer=answer,
+            output_format="spans"
+        )
+        # Process predictions for HighlightedText
+        if not predictions:
+            return "🟢", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A"
+        highlighted_segments = []
+        confidence_scores = []
+        last_end = 0
+        total_confidence = 0.0
+        for pred in predictions:
+            start, end = pred['start'], pred['end']
+            confidence = pred['confidence']
+            text = pred['text']
+            # Add non-hallucinated text before this span
+            if last_end < start:
+                highlighted_segments.append((answer[last_end:start], None))
+            # Add hallucinated span with confidence as label
+            label_with_confidence = f"hallucination (conf: {confidence:.4f})"
+            highlighted_segments.append((text, label_with_confidence))
+            confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}")
+            total_confidence += confidence
+            last_end = end
+        # Add any remaining text after the last hallucination
+        if last_end < len(answer):
+            highlighted_segments.append((answer[last_end:], None))
+        # Calculate average confidence
+        avg_confidence = total_confidence / len(predictions) if predictions else 0.0
+        # Determine overall status
+        status = "🔴" if predictions else "🟢"
+        explanation = "Hallucinations detected" if predictions else "No hallucinations detected"
+        return (
+            status,
+            explanation,
+            highlighted_segments,
+            "\n".join(confidence_scores) if confidence_scores else "N/A",
+            f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A"
+        )
+    except Exception as e:
+        return "⚪", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A"
+# Gradio Blocks interface
+with gr.Blocks(
+    title="🥬 LettuceDetect Hallucination Tester 🟢🔴",
+    theme="ParityError/Anime"
+) as demo:
+    gr.Markdown(
+        """
+        # 🥬 LettuceDetect Hallucination Tester 🟢🔴
+        Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support!
+        ### How to Use:
+        1. Enter a **Context** (source document or info).
+        2. Enter a **Question** related to the context.
+        3. Enter an **Answer** to evaluate.
+        4. Press **Submit** to see if the answer hallucinates!
+        - 🟢 = No hallucinations
+        - 🔴 = Hallucinations detected
+        - Highlighted text shows hallucinated spans in **red** with confidence scores.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Inputs
+            context_input = gr.Textbox(
+                label="Context",
+                lines=5,
+                placeholder="Enter the context (e.g., a document or source text)..."
+            )
+            question_input = gr.Textbox(
+                label="Question",
+                placeholder="Enter the question..."
+            )
+            answer_input = gr.Textbox(
+                label="Answer",
+                lines=3,
+                placeholder="Enter the answer to evaluate..."
+            )
+            submit_btn = gr.Button("Submit")
+        with gr.Column(scale=3):
+            # Outputs
+            status_output = gr.Label(label="Status")
+            explanation_output = gr.Textbox(label="Explanation", interactive=False)
+            highlighted_answer_output = gr.HighlightedText(
+                label="Answer with Hallucinations Highlighted",
+                show_legend=True,
+                color_map={"hallucination": "red"},  # Note: Only "hallucination" is used as base category
+                combine_adjacent=True
+            )
+            spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False)
+            avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False)
+    # Connect inputs to outputs via the evaluation function
+    submit_btn.click(
+        fn=evaluate_hallucination,
+        inputs=[context_input, question_input, answer_input],
+        outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output]
+    )
+    # Example
+    gr.Markdown("### Example")
+    with gr.Row():
+        gr.Examples(
+            examples=[
+                [
+                    "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
+                    "What is the capital of France? What is the population of France?",
+                    "The capital of France is Paris. The population of France is 69 million."
+                ]
+            ],
+            inputs=[context_input, question_input, answer_input]
+        )
+# Launch the demo
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ lettucedetect