Spaces:

ADOPLE
/

Fact_Checking

Runtime error

App Files Files Community

KarthickAdopleAI commited on Mar 20, 2024

Commit

2058409

verified ·

1 Parent(s): 70413db

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -95

app.py CHANGED Viewed

@@ -82,103 +82,41 @@ class FactChecking:
         return output.replace("</s>","")
-    def extract_unique_sentences(self, text: str) -> Set[str]:
-        """
-        Extracts unique sentences from the given text.
-        Args:
-            text (str): The input text.
-        Returns:
-            Set[str]: A set containing unique sentences.
-        """
         try:
-          # Tokenize the text into sentences using regex
-          sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
-          logging.info("Sentence extraction completed successfully.")
-          # Return a list of sentences
-          return sentences
-        except Exception as e:
-            logging.error(f"Error occurred in extract_unique_sentences: {e}")
-            return set()
-    def find_different_sentences(self,answer):
-        splitted_answer=answer.split("\n\n")
-        predictions_=[]
-        for i in range(len(splitted_answer)):
-          if "True." in splitted_answer[i]:
-            prediction="factual"
-            context=splitted_answer[i].split("\n")
-            # print(context)
-            for j in range(len(context)):
-              t_sentence=context[j].replace(f"Fact {i+1}: ","")
-              predictions_.append((t_sentence, prediction))
-              break
-          elif "False." in splitted_answer[i]:
-            prediction="hallucinated"
-            context=splitted_answer[i].split("\n")
-            for j in range(len(context)):
-              sentence=context[j].replace(f"Fact {i+1}: ","")
-              break
-            predictions_.append((sentence, prediction))
-        return predictions_
-    def extract_words(self, text: str) -> List[str]:
-        """
-        Extracts words from the input text.
-        Parameters:
-            text (str): The input text.
-        Returns:
-            List[str]: A list containing the extracted words.
-        """
-        try:
-          # Tokenize the text into words and non-word characters (including spaces) using regex
-          chunks = re.findall(r'\b\w+\b|\W+', text)
-          logging.info("Words extracted successfully.")
-        except Exception as e:
-            logging.error(f"An error occurred while extracting words: {str(e)}")
-            return []
-        else:
-            return chunks
-    def label_words(self, text1: str, text2: str) -> List[Tuple[str, str]]:
-        """
-        Labels words in text1 as 'factual' if they are present in text2, otherwise 'hallucinated'.
-        Parameters:
-            text1 (str): The first text.
-            text2 (str): The second text.
-        Returns:
-            List[Tuple[str, str]]: A list of tuples containing words from text1 and their labels.
-        """
-        try:
-          # Extract chunks from both texts
-          chunks_text1 = self.extract_words(text1)
-          chunks_text2 = self.extract_words(text2)
-          # Convert chunks_text2 into a set for faster lookup
-          chunks_set_text2 = set(chunks_text2)
-          # Initialize labels list
-          labels = []
-          # Iterate over chunks in text1
-          for chunk in chunks_text1:
-              # Check if chunk is present in text2
-              if chunk in chunks_set_text2:
-                  labels.append((chunk, 'factual'))
-              else:
-                  labels.append((chunk, 'hallucinated'))
-          logging.info("Words labeled successfully.")
-          return labels
-        except Exception as e:
-            logging.error(f"An error occurred while labeling words: {str(e)}")
-            return []
     def find_hallucinatted_sentence(self, question: str) -> Tuple[str, List[str]]:
         """
@@ -215,20 +153,31 @@ class FactChecking:
           If the fact is false, explain why."""
           prompt_template = PromptTemplate(input_variables=["statement"], template=template)
           assumptions_chain = LLMChain(llm=self.llm, prompt=prompt_template)
-          overall_chain = SimpleSequentialChain(chains=[question_chain, assumptions_chain], verbose=True)
           answer = overall_chain.run(mixtral_response)
           # Find different sentences between original result and fact checking result
-          prediction_list = self.find_different_sentences(answer)
-          logging.info("Sentences comparison completed successfully.")
           # Return the original result and list of hallucinated sentences
-          return mixtral_response,prediction_list
         except Exception as e:
-            logging.error(f"Error occurred in find_hallucinatted_sentence: {e}")
             return "", []
     def interface(self):
       css=""".gradio-container {background: rgb(157,228,255);
         background: radial-gradient(circle, rgba(157,228,255,1) 0%, rgba(18,115,106,1) 100%);}"""
@@ -242,13 +191,15 @@ class FactChecking:
           button = gr.Button(value="Submit")
         with gr.Row():
           mixtral_response = gr.Textbox(label="llm answer")
         with gr.Row():
           highlighted_prediction = gr.HighlightedText(
                                   label="Sentence Hallucination detection",
                                   combine_adjacent=True,
                                   color_map={"hallucinated": "red", "factual": "green"},
                                   show_legend=True)
-        button.click(self.find_hallucinatted_sentence,question,[mixtral_response,highlighted_prediction])
       demo.launch(debug=True)

         return output.replace("</s>","")
+    def find_different_sentences(self,chain_answer):
         try:
+          truth_values = [sentence.strip().split(' (')[1][:-1] for sentence in chain_answer.split('\n\n')]
+        except:
+          print("single new line presenting")
+        try:
+          # Extracting the truth values from chain_answer
+          truth_values = [sentence.strip().split(' (')[1][:-1] for sentence in chain_answer.split('\n')]
+        except:
+          print("two new lines presenting")
+        tags = []
+        for tag in truth_values:
+          if "True" in tag:
+            tags.append("factual")
+          else:
+            tags.append("hallucinated")
+        # Splitting llm_answer into sentences
+        llm_sentences = llm_answer.split('. ')
+        # Initializing an empty list to store tagged sentences
+        tagged_sentences = []
+        # Mapping the truth values to sentences in llm_answer
+        for sentence, truth_value in zip(llm_sentences, tags):
+            # Extracting the sentence without the truth value
+            sentence_text = sentence.split(' (')[0]
+            # Appending the sentence with its truth value
+            tagged_sentences.append(((sentence_text+"."),(truth_value)))
+        return tagged_sentences
     def find_hallucinatted_sentence(self, question: str) -> Tuple[str, List[str]]:
         """
           If the fact is false, explain why."""
           prompt_template = PromptTemplate(input_variables=["statement"], template=template)
           assumptions_chain = LLMChain(llm=self.llm, prompt=prompt_template)
+          extra_template = f" Original Summary:{mixtral_response} Using these checked assertions to write the original summary with true or false in sentence wised.          For each fact, determine whether it is true or false about the subject. If you are unable to determine whether the fact is true or false, output 'Undetermined'.***format: sentence (True or False) in braces.***"
+          template = """Below are some assertions that have been fact checked and are labeled as true of false. If the answer is false, a suggestion is given for a correction.
+          Checked Assertions:
+          {assertions}
+          """
+          template += extra_template
+          prompt_template = PromptTemplate(input_variables=["assertions"], template=template)
+          answer_chain = LLMChain(llm=self.llm, prompt=prompt_template)
+          overall_chain = SimpleSequentialChain(chains=[question_chain,assumptions_chain,answer_chain], verbose=True)
           answer = overall_chain.run(mixtral_response)
           # Find different sentences between original result and fact checking result
+          prediction_list = self.find_different_sentences(answer,mixtral_response)
+          # prediction_list += generated_words
           # Return the original result and list of hallucinated sentences
+          return mixtral_response,prediction_list,answer
         except Exception as e:
+            print(f"Error occurred in find_hallucinatted_sentence: {e}")
             return "", []
     def interface(self):
       css=""".gradio-container {background: rgb(157,228,255);
         background: radial-gradient(circle, rgba(157,228,255,1) 0%, rgba(18,115,106,1) 100%);}"""
           button = gr.Button(value="Submit")
         with gr.Row():
           mixtral_response = gr.Textbox(label="llm answer")
+        with gr.Row():
+          fact_checking_result = gr.Textbox(label="hallucinated detection result")
         with gr.Row():
           highlighted_prediction = gr.HighlightedText(
                                   label="Sentence Hallucination detection",
                                   combine_adjacent=True,
                                   color_map={"hallucinated": "red", "factual": "green"},
                                   show_legend=True)
+        button.click(self.find_hallucinatted_sentence,question,[mixtral_response,highlighted_prediction,fact_checking_result])
       demo.launch(debug=True)