Spaces:
Runtime error
Runtime error
Jan Štihec
commited on
Commit
•
d2dc8ab
1
Parent(s):
901aa88
Update app
Browse files- GPTHelper.py +12 -14
- app.py +20 -6
- prompts/gpt35_fact_check.txt +9 -0
- prompts/gpt35_rephrase.txt +3 -0
GPTHelper.py
CHANGED
@@ -4,14 +4,17 @@ import os
|
|
4 |
import logging
|
5 |
import streamlit as st
|
6 |
|
7 |
-
openai.api_key = st.secrets[
|
8 |
|
9 |
|
10 |
-
def
|
|
|
|
|
|
|
|
|
|
|
11 |
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
|
12 |
-
prompt =
|
13 |
-
FACT: {fact}\n\
|
14 |
-
PUBMED QUERY:"
|
15 |
try:
|
16 |
response = openai.Completion.create(
|
17 |
model="text-davinci-003",
|
@@ -36,12 +39,9 @@ def gpt_rephrase(fact):
|
|
36 |
logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
|
37 |
|
38 |
|
39 |
-
def
|
40 |
# Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
|
41 |
-
prompt =
|
42 |
-
EVIDENCE: {evidence}\n \
|
43 |
-
HYPOTHESIS: {fact}\n \
|
44 |
-
ANSWER:"
|
45 |
try:
|
46 |
response = openai.Completion.create(
|
47 |
model="text-davinci-003",
|
@@ -65,11 +65,9 @@ def check_fact(evidence, fact):
|
|
65 |
logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
|
66 |
|
67 |
|
68 |
-
def
|
69 |
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
|
70 |
-
prompt =
|
71 |
-
FACT: {fact}\n\
|
72 |
-
PUBMED QUERY:"
|
73 |
try:
|
74 |
response = openai.ChatCompletion.create(
|
75 |
model="gpt-3.5-turbo",
|
|
|
4 |
import logging
|
5 |
import streamlit as st
|
6 |
|
7 |
+
openai.api_key = st.secrets['openai_API_key']
|
8 |
|
9 |
|
10 |
+
def open_file(filepath):
|
11 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
12 |
+
return file.read()
|
13 |
+
|
14 |
+
|
15 |
+
def gpt35_rephrase(fact):
|
16 |
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
|
17 |
+
prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
|
|
|
|
|
18 |
try:
|
19 |
response = openai.Completion.create(
|
20 |
model="text-davinci-003",
|
|
|
39 |
logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
|
40 |
|
41 |
|
42 |
+
def gpt35_check_fact(evidence, fact):
|
43 |
# Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
|
44 |
+
prompt = open_file('prompts/gpt35_fact_check.txt').replace('<<EVIDENCE>>', evidence).replace('<<HYPOTHESIS>>', fact)
|
|
|
|
|
|
|
45 |
try:
|
46 |
response = openai.Completion.create(
|
47 |
model="text-davinci-003",
|
|
|
65 |
logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
|
66 |
|
67 |
|
68 |
+
def gpt35_turbo_rephrase(fact):
|
69 |
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
|
70 |
+
prompt = open_file('prompts/gpt35_rephrase.txt').replace('<<FACT>>', fact)
|
|
|
|
|
71 |
try:
|
72 |
response = openai.ChatCompletion.create(
|
73 |
model="gpt-3.5-turbo",
|
app.py
CHANGED
@@ -22,8 +22,9 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
|
|
22 |
for article in results:
|
23 |
article_id = 0 # If PubMed search fails to return anything
|
24 |
try:
|
25 |
-
article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the
|
26 |
-
|
|
|
27 |
conclusion = article.conclusions
|
28 |
abstract = article.abstract
|
29 |
article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
|
@@ -31,10 +32,12 @@ def get_articles(query, fetcher) -> Dict[List[str], List[str]]:
|
|
31 |
f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
|
32 |
if conclusion:
|
33 |
# Not all articles come with the provided conclusions. Abstract is used alternatively.
|
|
|
34 |
conclusions.append(title+'\n'+conclusion)
|
35 |
titles.append(title) # Title is added to the conclusion to improve relevance ranking.
|
36 |
links.append(article_link)
|
37 |
elif abstract:
|
|
|
38 |
conclusions.append(title + '\n' + abstract)
|
39 |
titles.append(title)
|
40 |
links.append(article_link)
|
@@ -96,6 +99,9 @@ def run_ui():
|
|
96 |
sidebar.title('HOW IT WORKS')
|
97 |
sidebar.write('Source code and in-depth app description available at:')
|
98 |
sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
|
|
|
|
|
|
|
99 |
|
100 |
if not submitted and not st.session_state.valid_inputs_received:
|
101 |
st.stop()
|
@@ -116,7 +122,7 @@ def run_ui():
|
|
116 |
st.stop()
|
117 |
|
118 |
elif submitted or st.session_state.valid_inputs_received:
|
119 |
-
pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5
|
120 |
pubmed = load_pubmed_fetcher()
|
121 |
|
122 |
with st.spinner('Fetching articles...'):
|
@@ -125,7 +131,6 @@ def run_ui():
|
|
125 |
article_conclusions = articles['Conclusions']
|
126 |
article_links = articles['Links']
|
127 |
cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
|
128 |
-
|
129 |
with st.spinner('Assessing article relevancy...'):
|
130 |
cross_encoder = load_cross_encoder()
|
131 |
cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
|
@@ -135,7 +140,6 @@ def run_ui():
|
|
135 |
'Conclusion': article_conclusions,
|
136 |
'Score': cross_scores
|
137 |
})
|
138 |
-
|
139 |
df.sort_values(by=['Score'], ascending=False, inplace=True)
|
140 |
df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
|
141 |
if df.shape[0] == 0: # If no relevant article si found, inform the user.
|
@@ -153,7 +157,17 @@ def run_ui():
|
|
153 |
percent_complete = 0
|
154 |
predictions = []
|
155 |
for index, row in df.iterrows():
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
percent_complete += step/100
|
158 |
fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
|
159 |
fact_checking_bar.empty()
|
|
|
22 |
for article in results:
|
23 |
article_id = 0 # If PubMed search fails to return anything
|
24 |
try:
|
25 |
+
article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the first
|
26 |
+
# [] can cause the cross-encoder to misinterpret string as a list
|
27 |
+
title = article.title.replace('[', '(').replace(']', ')')
|
28 |
conclusion = article.conclusions
|
29 |
abstract = article.abstract
|
30 |
article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
|
|
|
32 |
f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
|
33 |
if conclusion:
|
34 |
# Not all articles come with the provided conclusions. Abstract is used alternatively.
|
35 |
+
conclusion = conclusion.replace('[', '(').replace(']', ')')
|
36 |
conclusions.append(title+'\n'+conclusion)
|
37 |
titles.append(title) # Title is added to the conclusion to improve relevance ranking.
|
38 |
links.append(article_link)
|
39 |
elif abstract:
|
40 |
+
abstract = abstract.replace('[', '(').replace(']', ')')
|
41 |
conclusions.append(title + '\n' + abstract)
|
42 |
titles.append(title)
|
43 |
links.append(article_link)
|
|
|
99 |
sidebar.title('HOW IT WORKS')
|
100 |
sidebar.write('Source code and in-depth app description available at:')
|
101 |
sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
|
102 |
+
sidebar.title('DISCLAIMER')
|
103 |
+
sidebar.write('This project is meant for educational and research purposes. \n'
|
104 |
+
'PubMed fact-checker may provide inaccurate information.')
|
105 |
|
106 |
if not submitted and not st.session_state.valid_inputs_received:
|
107 |
st.stop()
|
|
|
122 |
st.stop()
|
123 |
|
124 |
elif submitted or st.session_state.valid_inputs_received:
|
125 |
+
pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5 to rephrase fact as a PubMed query.
|
126 |
pubmed = load_pubmed_fetcher()
|
127 |
|
128 |
with st.spinner('Fetching articles...'):
|
|
|
131 |
article_conclusions = articles['Conclusions']
|
132 |
article_links = articles['Links']
|
133 |
cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
|
|
|
134 |
with st.spinner('Assessing article relevancy...'):
|
135 |
cross_encoder = load_cross_encoder()
|
136 |
cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
|
|
|
140 |
'Conclusion': article_conclusions,
|
141 |
'Score': cross_scores
|
142 |
})
|
|
|
143 |
df.sort_values(by=['Score'], ascending=False, inplace=True)
|
144 |
df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
|
145 |
if df.shape[0] == 0: # If no relevant article si found, inform the user.
|
|
|
157 |
percent_complete = 0
|
158 |
predictions = []
|
159 |
for index, row in df.iterrows():
|
160 |
+
prediction = GPTHelper.gpt35_check_fact(row['Conclusion'], fact) # Prompt to GPT3.5 to fact-check
|
161 |
+
# For output purposes I use True, False and Undetermined as labels.
|
162 |
+
if prediction == 'Entails':
|
163 |
+
predictions.append('True')
|
164 |
+
elif prediction == 'Contradicts':
|
165 |
+
predictions.append('False')
|
166 |
+
elif prediction == 'Undetermined':
|
167 |
+
predictions.append(prediction)
|
168 |
+
else:
|
169 |
+
logging.warning(f'Unexpected prediction: {prediction}')
|
170 |
+
|
171 |
percent_complete += step/100
|
172 |
fact_checking_bar.progress(round(percent_complete, 2), text=progress_text)
|
173 |
fact_checking_bar.empty()
|
prompts/gpt35_fact_check.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Does the evidence entail the hypothesis? Answer with Entails, Contradicts or Undetermined.
|
2 |
+
Label explanation:
|
3 |
+
Entails: hypothesis is true.
|
4 |
+
Contradicts: hypothesis is false.
|
5 |
+
Undetermined: hypothesis is undetermined.
|
6 |
+
|
7 |
+
EVIDENCE: <<EVIDENCE>>
|
8 |
+
HYPOTHESIS: <<HYPOTHESIS>>
|
9 |
+
ANSWER:
|
prompts/gpt35_rephrase.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Rephrase the following fact as a Pubmed search query.
|
2 |
+
FACT: <<FACT>>
|
3 |
+
PUBMED QUERY:
|