Spaces:

jacinthes
/

PubMed-fact-checker

Runtime error

App Files Files Community

Jan Štihec commited on May 3, 2023

Commit

d2dc8ab

•

1 Parent(s): 901aa88

Update app

Browse files

Files changed (4) hide show

GPTHelper.py +12 -14
app.py +20 -6
prompts/gpt35_fact_check.txt +9 -0
prompts/gpt35_rephrase.txt +3 -0

GPTHelper.py CHANGED Viewed

@@ -4,14 +4,17 @@ import os
 import logging
 import streamlit as st
-openai.api_key = st.secrets["openai_api_key"]
-def gpt_rephrase(fact):
     # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
-    prompt = f"Rephrase the following fact as a Pubmed search query.\n\
-                    FACT: {fact}\n\
-                    PUBMED QUERY:"
     try:
         response = openai.Completion.create(
           model="text-davinci-003",
@@ -36,12 +39,9 @@ def gpt_rephrase(fact):
         logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
-def check_fact(evidence, fact):
     # Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
-    prompt = f"Based exclusively on the evidence provided, is the following hypothesis True, False or Undetermined?\n\
-                EVIDENCE: {evidence}\n \
-                HYPOTHESIS: {fact}\n \
-                ANSWER:"
     try:
         response = openai.Completion.create(
           model="text-davinci-003",
@@ -65,11 +65,9 @@ def check_fact(evidence, fact):
         logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
-def gpt35_rephrase(fact):
     # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
-    prompt = f"Rephrase the following fact as a Pubmed search query.\n\
-                  FACT: {fact}\n\
-                  PUBMED QUERY:"
     try:
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",

 import logging
 import streamlit as st
+openai.api_key = st.secrets['openai_API_key']
+def open_file(filepath):
+    with open(filepath, 'r', encoding='utf-8') as file:
+        return file.read()
+def gpt35_rephrase(fact):
     # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
+    prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
     try:
         response = openai.Completion.create(
           model="text-davinci-003",
         logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
+def gpt35_check_fact(evidence, fact):
     # Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
+    prompt = open_file('prompts/gpt35_fact_check.txt').replace('<<EVIDENCE>>', evidence).replace('<<HYPOTHESIS>>', fact)
     try:
         response = openai.Completion.create(
           model="text-davinci-003",
         logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
+def gpt35_turbo_rephrase(fact):
     # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
+    prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
     try:
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",

app.py CHANGED Viewed

@@ -22,8 +22,9 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
     for article in results:
         article_id = 0  # If PubMed search fails to return anything
         try:
-            article_id = article.pubmed_id[:8]  # Sometimes pymed wrongly returns a long list of ids. Use only the firstpip freeze >
-            title = article.title
             conclusion = article.conclusions
             abstract = article.abstract
             article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
@@ -31,10 +32,12 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
                            f'text-decoration: underline;">PubMed ID: {article_id}</a>'  # Injects a link to plotly
             if conclusion:
                 # Not all articles come with the provided conclusions. Abstract is used alternatively.
                 conclusions.append(title+'\n'+conclusion)
                 titles.append(title)  # Title is added to the conclusion to improve relevance ranking.
                 links.append(article_link)
             elif abstract:
                 conclusions.append(title + '\n' + abstract)
                 titles.append(title)
                 links.append(article_link)
@@ -96,6 +99,9 @@ def run_ui():
     sidebar.title('HOW IT WORKS')
     sidebar.write('Source code and in-depth app description available at:')
     sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
     if not submitted and not st.session_state.valid_inputs_received:
         st.stop()
@@ -116,7 +122,7 @@ def run_ui():
         st.stop()
     elif submitted or st.session_state.valid_inputs_received:
-        pubmed_query = GPTHelper.gpt35_rephrase(fact)  # Call gpt3.5 turbo to rephrase fact as a PubMed query.
         pubmed = load_pubmed_fetcher()
         with st.spinner('Fetching articles...'):
@@ -125,7 +131,6 @@ def run_ui():
         article_conclusions = articles['Conclusions']
         article_links = articles['Links']
         cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
         with st.spinner('Assessing article relevancy...'):
             cross_encoder = load_cross_encoder()
             cross_scores = cross_encoder.predict(cross_inp)  # Calculate relevancy using the defined cross-encoder.
@@ -135,7 +140,6 @@ def run_ui():
             'Conclusion': article_conclusions,
             'Score': cross_scores
         })
         df.sort_values(by=['Score'], ascending=False, inplace=True)
         df = df[df['Score'] > 0]  # Only keep articles with relevancy score above 0.
         if df.shape[0] == 0:  # If no relevant article si found, inform the user.
@@ -153,7 +157,17 @@ def run_ui():
         percent_complete = 0
         predictions = []
         for index, row in df.iterrows():
-            predictions.append(GPTHelper.check_fact(row['Conclusion'], fact))  # Prompt to GPT3.5 to fact-check
             percent_complete += step/100
             fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
         fact_checking_bar.empty()

     for article in results:
         article_id = 0  # If PubMed search fails to return anything
         try:
+            article_id = article.pubmed_id[:8]  # Sometimes pymed wrongly returns a long list of ids. Use only the first
+            # [] can cause the cross-encoder to misinterpret string as a list
+            title = article.title.replace('[', '(').replace(']', ')')
             conclusion = article.conclusions
             abstract = article.abstract
             article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
                            f'text-decoration: underline;">PubMed ID: {article_id}</a>'  # Injects a link to plotly
             if conclusion:
                 # Not all articles come with the provided conclusions. Abstract is used alternatively.
+                conclusion = conclusion.replace('[', '(').replace(']', ')')
                 conclusions.append(title+'\n'+conclusion)
                 titles.append(title)  # Title is added to the conclusion to improve relevance ranking.
                 links.append(article_link)
             elif abstract:
+                abstract = abstract.replace('[', '(').replace(']', ')')
                 conclusions.append(title + '\n' + abstract)
                 titles.append(title)
                 links.append(article_link)
     sidebar.title('HOW IT WORKS')
     sidebar.write('Source code and in-depth app description available at:')
     sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
+    sidebar.title('DISCLAIMER')
+    sidebar.write('This project is meant for educational and research purposes. \n'
+                  'PubMed fact-checker may provide inaccurate information.')
     if not submitted and not st.session_state.valid_inputs_received:
         st.stop()
         st.stop()
     elif submitted or st.session_state.valid_inputs_received:
+        pubmed_query = GPTHelper.gpt35_rephrase(fact)  # Call gpt3.5 to rephrase fact as a PubMed query.
         pubmed = load_pubmed_fetcher()
         with st.spinner('Fetching articles...'):
         article_conclusions = articles['Conclusions']
         article_links = articles['Links']
         cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
         with st.spinner('Assessing article relevancy...'):
             cross_encoder = load_cross_encoder()
             cross_scores = cross_encoder.predict(cross_inp)  # Calculate relevancy using the defined cross-encoder.
             'Conclusion': article_conclusions,
             'Score': cross_scores
         })
         df.sort_values(by=['Score'], ascending=False, inplace=True)
         df = df[df['Score'] > 0]  # Only keep articles with relevancy score above 0.
         if df.shape[0] == 0:  # If no relevant article si found, inform the user.
         percent_complete = 0
         predictions = []
         for index, row in df.iterrows():
+            prediction = GPTHelper.gpt35_check_fact(row['Conclusion'], fact)  # Prompt to GPT3.5 to fact-check
+            # For output purposes I use True, False and Undetermined as labels.
+            if prediction == 'Entails':
+                predictions.append('True')
+            elif prediction == 'Contradicts':
+                predictions.append('False')
+            elif prediction == 'Undetermined':
+                predictions.append(prediction)
+            else:
+                logging.warning(f'Unexpected prediction: {prediction}')
             percent_complete += step/100
             fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
         fact_checking_bar.empty()

prompts/gpt35_fact_check.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+Does the evidence entail the hypothesis? Answer with Entails, Contradicts or Undetermined.
+Label explanation:
+Entails: hypothesis is true.
+Contradicts: hypothesis is false.
+Undetermined: hypothesis is undetermined.
+EVIDENCE: <<EVIDENCE>>
+HYPOTHESIS: <<HYPOTHESIS>>
+ANSWER:

prompts/gpt35_rephrase.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Rephrase the following fact as a Pubmed search query.
+FACT: <<FACT>>
+PUBMED QUERY: