Tonic commited on
Commit
aa57e68
·
unverified ·
1 Parent(s): 46cd776

add new hallucination testing

Browse files
Files changed (2) hide show
  1. app.py +137 -40
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,48 +1,145 @@
1
-
2
  import gradio as gr
3
- import requests
4
- import os
5
-
6
- # Define the API parameters
7
- API_URL = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
8
- API_TOKEN = os.getenv("HF_AUTH_TOKEN")
9
- if not API_TOKEN:
10
- raise ValueError("Please set the HF_AUTH_TOKEN environment variable.")
11
 
12
- headers = {"Authorization": f"Bearer {API_TOKEN}"}
 
 
 
 
13
 
14
- # Function to query the API
15
- def query(payload):
16
- response = requests.post(API_URL, headers=headers, json=payload)
17
- return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # Function to be called by the Gradio interface
20
- def evaluate_hallucination(input1, input2):
21
- # Combine the inputs
22
- combined_input = f"{input1}. {input2}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Make the API call
25
- output = query({"inputs": combined_input})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Extract the score from the output
28
- score = output[0][0]['score']
 
 
 
 
29
 
30
- # Return a red or green circle based on the score
31
- if score < 0.5:
32
- return "🔴", "The score is less than 0.5"
33
- else:
34
- return "🟢", "The score is greater than 0.5"
35
-
36
- # Create the Gradio interface
37
- iface = gr.Interface(
38
- fn=evaluate_hallucination,
39
- inputs=[gr.Textbox(label="Assertion"), gr.Textbox(label="Citation")],
40
- outputs=[gr.Label(), gr.Textbox(label="Explanation")],
41
- live=False,
42
- title="👋🏻Welcome to 🌟Tonic's 🧠🌈Hallucination Tester 🔴🟢",
43
- description="How To Use 🌈Hallucination tester: 🗣️📝add any assertion from an LLM or a human 🗣️😷 add any citation from a RAG retriever or a source 👇🏻📩 Press send 🔴red means a 🌈hallucination, 🟢 green means a 🧠credible assertion. Check out the model [vectara/hallucination_evaluation_model](https://huggingface.co/vectara/hallucination_evaluation_model) You can also use 🥒🍆🫑Vectara - Hallucination Tester 🗣️😷 via API below or way by cloning this space. 🧬🔬🔍 Simply click here: Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [DataTonic](https://github.com/Tonic-AI/DataTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗",
44
- theme='ParityError/Anime',
45
- )
46
 
47
- # Launch the interface
48
- iface.launch()
 
 
1
  import gradio as gr
2
+ from lettucedetect.models.inference import HallucinationDetector
3
+ import os
 
 
 
 
 
 
4
 
5
+ # Initialize the LettuceDetect model
6
+ detector = HallucinationDetector(
7
+ method="transformer",
8
+ model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1"
9
+ )
10
 
11
+ # Function to evaluate hallucination with LettuceDetect
12
+ def evaluate_hallucination(context, question, answer):
13
+ try:
14
+ # Get span-level predictions from LettuceDetect
15
+ predictions = detector.predict(
16
+ context=[context],
17
+ question=question,
18
+ answer=answer,
19
+ output_format="spans"
20
+ )
21
+
22
+ # Process predictions for HighlightedText
23
+ if not predictions:
24
+ return "🟢", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A"
25
+
26
+ highlighted_segments = []
27
+ confidence_scores = []
28
+ last_end = 0
29
+ total_confidence = 0.0
30
+
31
+ for pred in predictions:
32
+ start, end = pred['start'], pred['end']
33
+ confidence = pred['confidence']
34
+ text = pred['text']
35
+
36
+ # Add non-hallucinated text before this span
37
+ if last_end < start:
38
+ highlighted_segments.append((answer[last_end:start], None))
39
+
40
+ # Add hallucinated span with confidence as label
41
+ label_with_confidence = f"hallucination (conf: {confidence:.4f})"
42
+ highlighted_segments.append((text, label_with_confidence))
43
+ confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}")
44
+ total_confidence += confidence
45
+ last_end = end
46
+
47
+ # Add any remaining text after the last hallucination
48
+ if last_end < len(answer):
49
+ highlighted_segments.append((answer[last_end:], None))
50
+
51
+ # Calculate average confidence
52
+ avg_confidence = total_confidence / len(predictions) if predictions else 0.0
53
+
54
+ # Determine overall status
55
+ status = "🔴" if predictions else "🟢"
56
+ explanation = "Hallucinations detected" if predictions else "No hallucinations detected"
57
+
58
+ return (
59
+ status,
60
+ explanation,
61
+ highlighted_segments,
62
+ "\n".join(confidence_scores) if confidence_scores else "N/A",
63
+ f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A"
64
+ )
65
+
66
+ except Exception as e:
67
+ return "⚪", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A"
68
 
69
+ # Gradio Blocks interface
70
+ with gr.Blocks(
71
+ title="🥬 LettuceDetect Hallucination Tester 🟢🔴",
72
+ theme="ParityError/Anime"
73
+ ) as demo:
74
+ gr.Markdown(
75
+ """
76
+ # 🥬 LettuceDetect Hallucination Tester 🟢🔴
77
+ Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support!
78
+
79
+ ### How to Use:
80
+ 1. Enter a **Context** (source document or info).
81
+ 2. Enter a **Question** related to the context.
82
+ 3. Enter an **Answer** to evaluate.
83
+ 4. Press **Submit** to see if the answer hallucinates!
84
+
85
+ - 🟢 = No hallucinations
86
+ - 🔴 = Hallucinations detected
87
+ - Highlighted text shows hallucinated spans in **red** with confidence scores.
88
+ """
89
+ )
90
 
91
+ with gr.Row():
92
+ with gr.Column(scale=2):
93
+ # Inputs
94
+ context_input = gr.Textbox(
95
+ label="Context",
96
+ lines=5,
97
+ placeholder="Enter the context (e.g., a document or source text)..."
98
+ )
99
+ question_input = gr.Textbox(
100
+ label="Question",
101
+ placeholder="Enter the question..."
102
+ )
103
+ answer_input = gr.Textbox(
104
+ label="Answer",
105
+ lines=3,
106
+ placeholder="Enter the answer to evaluate..."
107
+ )
108
+ submit_btn = gr.Button("Submit")
109
+
110
+ with gr.Column(scale=3):
111
+ # Outputs
112
+ status_output = gr.Label(label="Status")
113
+ explanation_output = gr.Textbox(label="Explanation", interactive=False)
114
+ highlighted_answer_output = gr.HighlightedText(
115
+ label="Answer with Hallucinations Highlighted",
116
+ show_legend=True,
117
+ color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category
118
+ combine_adjacent=True
119
+ )
120
+ spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False)
121
+ avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False)
122
 
123
+ # Connect inputs to outputs via the evaluation function
124
+ submit_btn.click(
125
+ fn=evaluate_hallucination,
126
+ inputs=[context_input, question_input, answer_input],
127
+ outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output]
128
+ )
129
 
130
+ # Example
131
+ gr.Markdown("### Example")
132
+ with gr.Row():
133
+ gr.Examples(
134
+ examples=[
135
+ [
136
+ "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
137
+ "What is the capital of France? What is the population of France?",
138
+ "The capital of France is Paris. The population of France is 69 million."
139
+ ]
140
+ ],
141
+ inputs=[context_input, question_input, answer_input]
142
+ )
 
 
 
143
 
144
+ # Launch the demo
145
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ lettucedetect