Jan Štihec commited on
Commit
d2dc8ab
1 Parent(s): 901aa88

Update app

Browse files
GPTHelper.py CHANGED
@@ -4,14 +4,17 @@ import os
4
  import logging
5
  import streamlit as st
6
 
7
- openai.api_key = st.secrets["openai_api_key"]
8
 
9
 
10
- def gpt_rephrase(fact):
 
 
 
 
 
11
  # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
12
- prompt = f"Rephrase the following fact as a Pubmed search query.\n\
13
- FACT: {fact}\n\
14
- PUBMED QUERY:"
15
  try:
16
  response = openai.Completion.create(
17
  model="text-davinci-003",
@@ -36,12 +39,9 @@ def gpt_rephrase(fact):
36
  logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
37
 
38
 
39
- def check_fact(evidence, fact):
40
  # Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
41
- prompt = f"Based exclusively on the evidence provided, is the following hypothesis True, False or Undetermined?\n\
42
- EVIDENCE: {evidence}\n \
43
- HYPOTHESIS: {fact}\n \
44
- ANSWER:"
45
  try:
46
  response = openai.Completion.create(
47
  model="text-davinci-003",
@@ -65,11 +65,9 @@ def check_fact(evidence, fact):
65
  logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
66
 
67
 
68
- def gpt35_rephrase(fact):
69
  # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
70
- prompt = f"Rephrase the following fact as a Pubmed search query.\n\
71
- FACT: {fact}\n\
72
- PUBMED QUERY:"
73
  try:
74
  response = openai.ChatCompletion.create(
75
  model="gpt-3.5-turbo",
 
4
  import logging
5
  import streamlit as st
6
 
7
+ openai.api_key = st.secrets['openai_API_key']
8
 
9
 
10
+ def open_file(filepath):
11
+ with open(filepath, 'r', encoding='utf-8') as file:
12
+ return file.read()
13
+
14
+
15
+ def gpt35_rephrase(fact):
16
  # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
17
+ prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
 
 
18
  try:
19
  response = openai.Completion.create(
20
  model="text-davinci-003",
 
39
  logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
40
 
41
 
42
+ def gpt35_check_fact(evidence, fact):
43
  # Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
44
+ prompt = open_file('prompts/gpt35_fact_check.txt').replace('<<EVIDENCE>>', evidence).replace('<<HYPOTHESIS>>', fact)
 
 
 
45
  try:
46
  response = openai.Completion.create(
47
  model="text-davinci-003",
 
65
  logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
66
 
67
 
68
+ def gpt35_turbo_rephrase(fact):
69
  # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
70
+ prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
 
 
71
  try:
72
  response = openai.ChatCompletion.create(
73
  model="gpt-3.5-turbo",
app.py CHANGED
@@ -22,8 +22,9 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
22
  for article in results:
23
  article_id = 0 # If PubMed search fails to return anything
24
  try:
25
- article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the firstpip freeze >
26
- title = article.title
 
27
  conclusion = article.conclusions
28
  abstract = article.abstract
29
  article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
@@ -31,10 +32,12 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
31
  f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
32
  if conclusion:
33
  # Not all articles come with the provided conclusions. Abstract is used alternatively.
 
34
  conclusions.append(title+'\n'+conclusion)
35
  titles.append(title) # Title is added to the conclusion to improve relevance ranking.
36
  links.append(article_link)
37
  elif abstract:
 
38
  conclusions.append(title + '\n' + abstract)
39
  titles.append(title)
40
  links.append(article_link)
@@ -96,6 +99,9 @@ def run_ui():
96
  sidebar.title('HOW IT WORKS')
97
  sidebar.write('Source code and in-depth app description available at:')
98
  sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
 
 
 
99
 
100
  if not submitted and not st.session_state.valid_inputs_received:
101
  st.stop()
@@ -116,7 +122,7 @@ def run_ui():
116
  st.stop()
117
 
118
  elif submitted or st.session_state.valid_inputs_received:
119
- pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5 turbo to rephrase fact as a PubMed query.
120
  pubmed = load_pubmed_fetcher()
121
 
122
  with st.spinner('Fetching articles...'):
@@ -125,7 +131,6 @@ def run_ui():
125
  article_conclusions = articles['Conclusions']
126
  article_links = articles['Links']
127
  cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
128
-
129
  with st.spinner('Assessing article relevancy...'):
130
  cross_encoder = load_cross_encoder()
131
  cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
@@ -135,7 +140,6 @@ def run_ui():
135
  'Conclusion': article_conclusions,
136
  'Score': cross_scores
137
  })
138
-
139
  df.sort_values(by=['Score'], ascending=False, inplace=True)
140
  df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
141
  if df.shape[0] == 0: # If no relevant article si found, inform the user.
@@ -153,7 +157,17 @@ def run_ui():
153
  percent_complete = 0
154
  predictions = []
155
  for index, row in df.iterrows():
156
- predictions.append(GPTHelper.check_fact(row['Conclusion'], fact)) # Prompt to GPT3.5 to fact-check
 
 
 
 
 
 
 
 
 
 
157
  percent_complete += step/100
158
  fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
159
  fact_checking_bar.empty()
 
22
  for article in results:
23
  article_id = 0 # If PubMed search fails to return anything
24
  try:
25
+ article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the first
26
+ # [] can cause the cross-encoder to misinterpret string as a list
27
+ title = article.title.replace('[', '(').replace(']', ')')
28
  conclusion = article.conclusions
29
  abstract = article.abstract
30
  article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
 
32
  f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
33
  if conclusion:
34
  # Not all articles come with the provided conclusions. Abstract is used alternatively.
35
+ conclusion = conclusion.replace('[', '(').replace(']', ')')
36
  conclusions.append(title+'\n'+conclusion)
37
  titles.append(title) # Title is added to the conclusion to improve relevance ranking.
38
  links.append(article_link)
39
  elif abstract:
40
+ abstract = abstract.replace('[', '(').replace(']', ')')
41
  conclusions.append(title + '\n' + abstract)
42
  titles.append(title)
43
  links.append(article_link)
 
99
  sidebar.title('HOW IT WORKS')
100
  sidebar.write('Source code and in-depth app description available at:')
101
  sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
102
+ sidebar.title('DISCLAIMER')
103
+ sidebar.write('This project is meant for educational and research purposes. \n'
104
+ 'PubMed fact-checker may provide inaccurate information.')
105
 
106
  if not submitted and not st.session_state.valid_inputs_received:
107
  st.stop()
 
122
  st.stop()
123
 
124
  elif submitted or st.session_state.valid_inputs_received:
125
+ pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5 to rephrase fact as a PubMed query.
126
  pubmed = load_pubmed_fetcher()
127
 
128
  with st.spinner('Fetching articles...'):
 
131
  article_conclusions = articles['Conclusions']
132
  article_links = articles['Links']
133
  cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
 
134
  with st.spinner('Assessing article relevancy...'):
135
  cross_encoder = load_cross_encoder()
136
  cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
 
140
  'Conclusion': article_conclusions,
141
  'Score': cross_scores
142
  })
 
143
  df.sort_values(by=['Score'], ascending=False, inplace=True)
144
  df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
145
  if df.shape[0] == 0: # If no relevant article si found, inform the user.
 
157
  percent_complete = 0
158
  predictions = []
159
  for index, row in df.iterrows():
160
+ prediction = GPTHelper.gpt35_check_fact(row['Conclusion'], fact) # Prompt to GPT3.5 to fact-check
161
+ # For output purposes I use True, False and Undetermined as labels.
162
+ if prediction == 'Entails':
163
+ predictions.append('True')
164
+ elif prediction == 'Contradicts':
165
+ predictions.append('False')
166
+ elif prediction == 'Undetermined':
167
+ predictions.append(prediction)
168
+ else:
169
+ logging.warning(f'Unexpected prediction: {prediction}')
170
+
171
  percent_complete += step/100
172
  fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
173
  fact_checking_bar.empty()
prompts/gpt35_fact_check.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Does the evidence entail the hypothesis? Answer with Entails, Contradicts or Undetermined.
2
+ Label explanation:
3
+ Entails: hypothesis is true.
4
+ Contradicts: hypothesis is false.
5
+ Undetermined: hypothesis is undetermined.
6
+
7
+ EVIDENCE: <<EVIDENCE>>
8
+ HYPOTHESIS: <<HYPOTHESIS>>
9
+ ANSWER:
prompts/gpt35_rephrase.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Rephrase the following fact as a Pubmed search query.
2
+ FACT: <<FACT>>
3
+ PUBMED QUERY: