KarthickAdopleAI commited on
Commit
2058409
·
verified ·
1 Parent(s): 70413db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -95
app.py CHANGED
@@ -82,103 +82,41 @@ class FactChecking:
82
 
83
  return output.replace("</s>","")
84
 
85
- def extract_unique_sentences(self, text: str) -> Set[str]:
86
- """
87
- Extracts unique sentences from the given text.
88
 
89
- Args:
90
- text (str): The input text.
91
 
92
- Returns:
93
- Set[str]: A set containing unique sentences.
94
- """
95
  try:
96
- # Tokenize the text into sentences using regex
97
- sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
98
- logging.info("Sentence extraction completed successfully.")
99
- # Return a list of sentences
100
- return sentences
101
- except Exception as e:
102
- logging.error(f"Error occurred in extract_unique_sentences: {e}")
103
- return set()
104
-
105
- def find_different_sentences(self,answer):
106
- splitted_answer=answer.split("\n\n")
107
-
108
- predictions_=[]
109
- for i in range(len(splitted_answer)):
110
- if "True." in splitted_answer[i]:
111
- prediction="factual"
112
- context=splitted_answer[i].split("\n")
113
- # print(context)
114
- for j in range(len(context)):
115
- t_sentence=context[j].replace(f"Fact {i+1}: ","")
116
- predictions_.append((t_sentence, prediction))
117
- break
118
-
119
-
120
- elif "False." in splitted_answer[i]:
121
- prediction="hallucinated"
122
- context=splitted_answer[i].split("\n")
123
- for j in range(len(context)):
124
- sentence=context[j].replace(f"Fact {i+1}: ","")
125
- break
126
- predictions_.append((sentence, prediction))
127
-
128
-
129
- return predictions_
130
 
131
- def extract_words(self, text: str) -> List[str]:
132
- """
133
- Extracts words from the input text.
 
 
 
 
 
134
 
135
- Parameters:
136
- text (str): The input text.
137
 
138
- Returns:
139
- List[str]: A list containing the extracted words.
140
- """
141
- try:
142
- # Tokenize the text into words and non-word characters (including spaces) using regex
143
- chunks = re.findall(r'\b\w+\b|\W+', text)
144
- logging.info("Words extracted successfully.")
145
- except Exception as e:
146
- logging.error(f"An error occurred while extracting words: {str(e)}")
147
- return []
148
- else:
149
- return chunks
150
 
151
- def label_words(self, text1: str, text2: str) -> List[Tuple[str, str]]:
152
- """
153
- Labels words in text1 as 'factual' if they are present in text2, otherwise 'hallucinated'.
154
 
155
- Parameters:
156
- text1 (str): The first text.
157
- text2 (str): The second text.
158
 
159
- Returns:
160
- List[Tuple[str, str]]: A list of tuples containing words from text1 and their labels.
161
- """
162
- try:
163
- # Extract chunks from both texts
164
- chunks_text1 = self.extract_words(text1)
165
- chunks_text2 = self.extract_words(text2)
166
- # Convert chunks_text2 into a set for faster lookup
167
- chunks_set_text2 = set(chunks_text2)
168
- # Initialize labels list
169
- labels = []
170
- # Iterate over chunks in text1
171
- for chunk in chunks_text1:
172
- # Check if chunk is present in text2
173
- if chunk in chunks_set_text2:
174
- labels.append((chunk, 'factual'))
175
- else:
176
- labels.append((chunk, 'hallucinated'))
177
- logging.info("Words labeled successfully.")
178
- return labels
179
- except Exception as e:
180
- logging.error(f"An error occurred while labeling words: {str(e)}")
181
- return []
182
 
183
  def find_hallucinatted_sentence(self, question: str) -> Tuple[str, List[str]]:
184
  """
@@ -215,20 +153,31 @@ class FactChecking:
215
  If the fact is false, explain why."""
216
  prompt_template = PromptTemplate(input_variables=["statement"], template=template)
217
  assumptions_chain = LLMChain(llm=self.llm, prompt=prompt_template)
 
 
 
 
 
 
 
 
 
 
218
 
219
- overall_chain = SimpleSequentialChain(chains=[question_chain, assumptions_chain], verbose=True)
220
  answer = overall_chain.run(mixtral_response)
 
221
  # Find different sentences between original result and fact checking result
222
- prediction_list = self.find_different_sentences(answer)
223
 
224
- logging.info("Sentences comparison completed successfully.")
225
  # Return the original result and list of hallucinated sentences
226
- return mixtral_response,prediction_list
 
227
 
228
  except Exception as e:
229
- logging.error(f"Error occurred in find_hallucinatted_sentence: {e}")
230
  return "", []
231
-
232
  def interface(self):
233
  css=""".gradio-container {background: rgb(157,228,255);
234
  background: radial-gradient(circle, rgba(157,228,255,1) 0%, rgba(18,115,106,1) 100%);}"""
@@ -242,13 +191,15 @@ class FactChecking:
242
  button = gr.Button(value="Submit")
243
  with gr.Row():
244
  mixtral_response = gr.Textbox(label="llm answer")
 
 
245
  with gr.Row():
246
  highlighted_prediction = gr.HighlightedText(
247
  label="Sentence Hallucination detection",
248
  combine_adjacent=True,
249
  color_map={"hallucinated": "red", "factual": "green"},
250
  show_legend=True)
251
- button.click(self.find_hallucinatted_sentence,question,[mixtral_response,highlighted_prediction])
252
  demo.launch(debug=True)
253
 
254
 
 
82
 
83
  return output.replace("</s>","")
84
 
 
 
 
85
 
 
 
86
 
87
+ def find_different_sentences(self,chain_answer):
 
 
88
  try:
89
+ truth_values = [sentence.strip().split(' (')[1][:-1] for sentence in chain_answer.split('\n\n')]
90
+ except:
91
+ print("single new line presenting")
92
+ try:
93
+ # Extracting the truth values from chain_answer
94
+ truth_values = [sentence.strip().split(' (')[1][:-1] for sentence in chain_answer.split('\n')]
95
+ except:
96
+ print("two new lines presenting")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ tags = []
99
+ for tag in truth_values:
100
+ if "True" in tag:
101
+ tags.append("factual")
102
+ else:
103
+ tags.append("hallucinated")
104
+ # Splitting llm_answer into sentences
105
+ llm_sentences = llm_answer.split('. ')
106
 
107
+ # Initializing an empty list to store tagged sentences
108
+ tagged_sentences = []
109
 
110
+ # Mapping the truth values to sentences in llm_answer
111
+ for sentence, truth_value in zip(llm_sentences, tags):
112
+ # Extracting the sentence without the truth value
113
+ sentence_text = sentence.split(' (')[0]
114
+ # Appending the sentence with its truth value
115
+ tagged_sentences.append(((sentence_text+"."),(truth_value)))
 
 
 
 
 
 
116
 
117
+ return tagged_sentences
 
 
118
 
 
 
 
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  def find_hallucinatted_sentence(self, question: str) -> Tuple[str, List[str]]:
122
  """
 
153
  If the fact is false, explain why."""
154
  prompt_template = PromptTemplate(input_variables=["statement"], template=template)
155
  assumptions_chain = LLMChain(llm=self.llm, prompt=prompt_template)
156
+ extra_template = f" Original Summary:{mixtral_response} Using these checked assertions to write the original summary with true or false in sentence wised. For each fact, determine whether it is true or false about the subject. If you are unable to determine whether the fact is true or false, output 'Undetermined'.***format: sentence (True or False) in braces.***"
157
+ template = """Below are some assertions that have been fact checked and are labeled as true of false. If the answer is false, a suggestion is given for a correction.
158
+
159
+ Checked Assertions:
160
+ {assertions}
161
+ """
162
+ template += extra_template
163
+ prompt_template = PromptTemplate(input_variables=["assertions"], template=template)
164
+ answer_chain = LLMChain(llm=self.llm, prompt=prompt_template)
165
+ overall_chain = SimpleSequentialChain(chains=[question_chain,assumptions_chain,answer_chain], verbose=True)
166
 
 
167
  answer = overall_chain.run(mixtral_response)
168
+
169
  # Find different sentences between original result and fact checking result
170
+ prediction_list = self.find_different_sentences(answer,mixtral_response)
171
 
172
+ # prediction_list += generated_words
173
  # Return the original result and list of hallucinated sentences
174
+
175
+ return mixtral_response,prediction_list,answer
176
 
177
  except Exception as e:
178
+ print(f"Error occurred in find_hallucinatted_sentence: {e}")
179
  return "", []
180
+
181
  def interface(self):
182
  css=""".gradio-container {background: rgb(157,228,255);
183
  background: radial-gradient(circle, rgba(157,228,255,1) 0%, rgba(18,115,106,1) 100%);}"""
 
191
  button = gr.Button(value="Submit")
192
  with gr.Row():
193
  mixtral_response = gr.Textbox(label="llm answer")
194
+ with gr.Row():
195
+ fact_checking_result = gr.Textbox(label="hallucinated detection result")
196
  with gr.Row():
197
  highlighted_prediction = gr.HighlightedText(
198
  label="Sentence Hallucination detection",
199
  combine_adjacent=True,
200
  color_map={"hallucinated": "red", "factual": "green"},
201
  show_legend=True)
202
+ button.click(self.find_hallucinatted_sentence,question,[mixtral_response,highlighted_prediction,fact_checking_result])
203
  demo.launch(debug=True)
204
 
205