|
|
|
|
|
import streamlit as st |
|
from transformers import pipeline |
|
import re |
|
import time |
|
import requests |
|
from PIL import Image |
|
import itertools |
|
|
|
st.set_page_config(page_title="FACTIFY - 5WQA", layout="wide") |
|
|
|
HF_SPACES_API_KEY = st.secrets["HF_token"] |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg" |
|
headers = {"Authorization": HF_SPACES_API_KEY} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.json() |
|
|
|
|
|
|
|
|
|
API_URL_evidence = "https://api-inference.huggingface.co/models/google/flan-t5-xxl" |
|
headers_evidence = {"Authorization": HF_SPACES_API_KEY} |
|
|
|
|
|
def query_evidence(payload): |
|
response = requests.post(API_URL_evidence, headers=headers_evidence, json=payload) |
|
return response.json() |
|
|
|
|
|
def model_load_qg(answer,claim): |
|
FACT_TO_GENERATE_QUESTION_FROM = f"""{answer} [SEP] {claim}""" |
|
while True: |
|
try: |
|
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, |
|
"num_beams":5, |
|
"early_stopping":True, |
|
"min_length": 100, |
|
"wait_for_model":True})[0]['generated_text'].capitalize() |
|
break |
|
except: |
|
time.sleep(1) |
|
return question_ids |
|
|
|
def model_load_qa(question_ids,evidence): |
|
|
|
|
|
while True: |
|
try: |
|
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" |
|
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] |
|
break |
|
except: |
|
time.sleep(1) |
|
return answer_evidence |
|
|
|
|
|
|
|
st.title('Welcome to :blue[FACTIFY - 5WQA] ') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header('5W Aspect-based Fact Verification through Question Answering :blue[Web Demo]') |
|
image = Image.open('5W QA Illustration.jpg') |
|
st.image(image, caption='5W QA Generation Pipeline') |
|
|
|
st.subheader('Here are a few steps to begin exploring and interacting with this demo.') |
|
st.caption('First you need to input your claim.') |
|
st.caption('Then you need to input your evidence.') |
|
st.caption('Upon completing these two steps, kindly wait for a minute to receive the results.') |
|
|
|
st.caption('Start by inputting the following instance of a claim and corresponding evidence into the designated text fields.') |
|
|
|
|
|
st.caption('**Example 1**') |
|
st.caption(''':green[Claim:] :point_right: Moderna's legal actions towards Pfizer-BioNTech indicate that the development of COVID-19 vaccines was underway prior to the commencement of the pandemic.''') |
|
|
|
st.caption(''':green[Evidence:] :point_right: Moderna is suing Pfizer and BioNTech for patent infringement, alleging the rival companies used key parts of its mRNA technology to develop their COVID-19 vaccine. Moderna’s patents were filed between 2010 and 2016. |
|
''') |
|
|
|
|
|
|
|
|
|
|
|
st.caption('**Example 2**') |
|
st.caption(''':green[Claim:] :point_right: In China, Buddhist monks and nuns lived together in places such as the Yunnan monastery.''') |
|
|
|
st.caption(''':green[Evidence:] :point_right: Monastics in Japan are particularly exceptional in the Buddhist tradition because the monks and nuns can marry after receiving their higher ordination . ''') |
|
|
|
|
|
st.caption('**Example 3**') |
|
st.caption(''':green[Claim:] :point_right: In Batman, Penguin hydrates the henchmen with water contaminated with atomic waste.''') |
|
|
|
st.caption(''':green[Evidence:] :point_right: And Penguin even schemes his way into the Batcave along with five dehydrated henchmen ; |
|
this plan fails when the henchmen are unexpectedly killed |
|
when he mistakenly rehydrates them with heavy water contaminated with atomic waste , |
|
regularly used to recharge the Batcave s atomic pile . ''') |
|
|
|
|
|
|
|
st.caption('**Example 4**') |
|
st.caption(''':green[Claim:] :point_right: Amazon to hire 100K workers and until April Amazon will raise hourly wages by $2 due to pandemic demand.''') |
|
st.caption(''':green[Evidence:] : Due to the consumers increasingly relying on online retailers, Amazon planned to hire over 99,000 workers in the warehouse and delivery sector during the Pandemic in the USA.''') |
|
|
|
|
|
def proc(): |
|
st.write(st.session_state.text_key) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.form(key="claim_evidence_form",clear_on_submit=True): |
|
claim_text = st.text_input("Enter claim:") |
|
evidence_text = st.text_input("Enter evidence:") |
|
submitted = st.form_submit_button("Submit") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
from rouge_score import rouge_scorer |
|
import numpy as np |
|
from allennlp.predictors.predictor import Predictor |
|
import allennlp_models.tagging |
|
predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz") |
|
|
|
|
|
list_of_pronouns = ["I","i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them", |
|
"mine", "yours", "his", "hers", "its", "ours", "theirs", |
|
"this", "that", "these", "those", |
|
"myself", "yourself", "himself", "herself", "itself", "ourselves", "yourselves", "themselves", |
|
"who", "whom", "what", "which", "whose", |
|
"all", "another", "any", "anybody", "anyone", "anything", "both", "each", "either", |
|
"everybody", "everyone", "everything", "few", "many", "neither", "nobody", "none", "nothing", |
|
"one", "other", "several", "some", "somebody", "someone", "something"] |
|
|
|
|
|
def srl(text): |
|
import re |
|
def remove_special_chars(text): |
|
|
|
text = re.sub(r'(?<!\d)[^\w\s]+(?!\d)', '', text) |
|
|
|
return text |
|
|
|
df = pd.DataFrame({'claim' : remove_special_chars(text)},index=[0]) |
|
|
|
def srl_allennlp(sent): |
|
try: |
|
|
|
|
|
result = predictor.predict(sentence=sent) |
|
return(result) |
|
except IndexError: |
|
pass |
|
|
|
|
|
df['allennlp_srl'] = df['claim'].apply(lambda x: srl_allennlp(x)) |
|
|
|
df['number_of_verbs'] = '' |
|
df['verbs_group'] = '' |
|
df['words'] = '' |
|
df['verbs'] = '' |
|
df['modified'] ='' |
|
|
|
col1 = df['allennlp_srl'] |
|
for i in range(len(col1)): |
|
num_verb = len(col1[i]['verbs']) |
|
df['number_of_verbs'][i] = num_verb |
|
df['verbs_group'][i] = col1[i]['verbs'] |
|
df['words'][i] = col1[i]['words'] |
|
|
|
x=[] |
|
for verb in range(len(col1[i]['verbs'])): |
|
x.append(col1[i]['verbs'][verb]['verb']) |
|
df['verbs'][i] = x |
|
|
|
verb_dict ={} |
|
desc = [] |
|
for j in range(len(col1[i]['verbs'])): |
|
string = (col1[i]['verbs'][j]['description']) |
|
string = string.replace("ARG0", "who") |
|
string = string.replace("ARG1", "what") |
|
string = string.replace("ARGM-TMP", "when") |
|
string = string.replace("ARGM-LOC", "where") |
|
string = string.replace("ARGM-CAU", "why") |
|
desc.append(string) |
|
verb_dict[col1[i]['verbs'][j]['verb']]=string |
|
df['modified'][i] = verb_dict |
|
|
|
|
|
|
|
df['who'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
val_list.append(v) |
|
|
|
who = set() |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("who: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
substr = substr.strip() |
|
pronouns = list_of_pronouns |
|
if substr.lower() not in pronouns and not substr.lower().endswith("'s"): |
|
who.add(substr) |
|
else: |
|
pass |
|
|
|
df['who'][j] = "<sep>".join(who) |
|
|
|
|
|
|
|
|
|
df['what'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
val_list.append(v) |
|
|
|
what = set() |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("what: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
substr = substr.strip() |
|
pronouns = list_of_pronouns |
|
if substr.lower() not in pronouns and not substr.lower().endswith("'s"): |
|
what.add(substr) |
|
else: |
|
pass |
|
|
|
df['what'][j] = "<sep>".join(what) |
|
|
|
|
|
df['why'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
val_list.append(v) |
|
|
|
why = set() |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("why: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
substr = substr.strip() |
|
pronouns = list_of_pronouns |
|
if substr.lower() not in pronouns and not substr.lower().endswith("'s"): |
|
why.add(substr) |
|
else: |
|
pass |
|
|
|
df['why'][j] = "<sep>".join(why) |
|
|
|
|
|
df['when'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
val_list.append(v) |
|
|
|
when = set() |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("when: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
substr = substr.strip() |
|
pronouns = list_of_pronouns |
|
if substr.lower() not in pronouns and not substr.lower().endswith("'s"): |
|
when.add(substr) |
|
else: |
|
pass |
|
|
|
df['when'][j] = "<sep>".join(when) |
|
|
|
|
|
|
|
df['where'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
val_list.append(v) |
|
|
|
where = set() |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("where: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
substr = substr.strip() |
|
pronouns = list_of_pronouns |
|
if substr.lower() not in pronouns and not substr.lower().endswith("'s"): |
|
where.add(substr) |
|
else: |
|
pass |
|
|
|
df['where'][j] = "<sep>".join(where) |
|
return who,what,when,where,why |
|
|
|
|
|
|
|
def calc_rouge_l_score(list_of_evidence, list_of_ans): |
|
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) |
|
scores = scorer.score(' '.join(list_of_evidence), ' '.join(list_of_ans)) |
|
return scores['rougeL'].fmeasure |
|
|
|
def qa_list_gen(claim,srl_list,evidence): |
|
list_of_qa_pipeline=[] |
|
for index,answer_claim in enumerate(srl_list): |
|
question = model_load_qg(answer_claim,claim) |
|
answer_evidence = model_load_qa(question,evidence) |
|
if answer_evidence.lower() in evidence.lower(): |
|
pass |
|
else: |
|
answer_evidence="" |
|
threshold = 0.2 |
|
list_of_pairs = [(answer_evidence, answer_claim)] |
|
rouge_l_score = calc_rouge_l_score(answer_evidence, answer_claim) |
|
if rouge_l_score >= threshold: |
|
verification_status = '✅ Verified Valid' |
|
elif rouge_l_score == 0: |
|
verification_status = '❔ Not verifiable' |
|
else: |
|
verification_status = '❌ Verified False' |
|
qa_pipeline=[question,answer_claim,answer_evidence,verification_status] |
|
list_of_qa_pipeline.append(qa_pipeline) |
|
return list_of_qa_pipeline |
|
|
|
|
|
|
|
if submitted and claim_text and evidence_text: |
|
st.caption(':green[Kindly hold on for a few minutes while the QA pairs are being generated]') |
|
srl_list = list(itertools.chain(*[list(s) for s in srl(claim_text)])) |
|
qa_list=qa_list_gen(claim_text,srl_list,evidence_text) |
|
list_who = [] |
|
list_what = [] |
|
list_when = [] |
|
list_where = [] |
|
list_why = [] |
|
list_misc = [] |
|
|
|
for item in qa_list: |
|
question = item[0] |
|
if any(x in question.lower() for x in ['who', 'what', 'when', 'where', 'why']): |
|
if 'who' in question.lower(): |
|
list_who.append(item) |
|
elif 'what' in question.lower(): |
|
list_what.append(item) |
|
elif 'when' in question.lower(): |
|
list_when.append(item) |
|
elif 'where' in question.lower(): |
|
list_where.append(item) |
|
elif 'why' in question.lower(): |
|
list_why.append(item) |
|
else: |
|
list_misc.append(item) |
|
lists = [list_who, list_when, list_why, list_where, list_what] |
|
for j, lst in enumerate(lists): |
|
for i, l in enumerate(lst): |
|
if l: |
|
l[0] = f"Q{i+1}: {l[0]}" |
|
l[1] = f"Claim:- {l[1]}" |
|
if l[2]: |
|
l[2] = f"answer retrieved from evidence:- {l[2]}" |
|
else: |
|
l[2] = f"answer retrieved from evidence:- No mention of '{['who', 'when', 'why', 'where', 'what'][j]}' in any related documents." |
|
|
|
for i in range(len(lists)): |
|
if not lists[i]: |
|
lists[i].extend([["No claims", "", f"No mention of '{['who', 'when', 'why', 'where', 'what'][i]}' in any related documents.", "❔ Not verifiable"]]) |
|
|
|
|
|
final_df = pd.DataFrame(columns=['Who Claims', 'What Claims', 'When Claims', 'Where Claims', 'Why Claims', 'Misc Claims']) |
|
|
|
all_items_who = [item for item_list in list_who for item in item_list] |
|
all_items_what = [item for item_list in list_what for item in item_list] |
|
all_items_when = [item for item_list in list_when for item in item_list] |
|
all_items_where = [item for item_list in list_where for item in item_list] |
|
all_items_why = [item for item_list in list_why for item in item_list] |
|
all_items_misc = [item for item_list in list_misc for item in item_list] |
|
|
|
|
|
max_rows = max(len(all_items_who), len(all_items_what), len(all_items_when), len(all_items_where), len(all_items_why), len(all_items_misc)) |
|
|
|
final_df['Who Claims'] = all_items_who + [''] * (max_rows - len(all_items_who)) |
|
final_df['What Claims'] = all_items_what + [''] * (max_rows - len(all_items_what)) |
|
final_df['When Claims'] = all_items_when + [''] * (max_rows - len(all_items_when)) |
|
final_df['Where Claims'] = all_items_where + [''] * (max_rows - len(all_items_where)) |
|
final_df['Why Claims'] = all_items_why + [''] * (max_rows - len(all_items_why)) |
|
final_df['Misc Claims'] = all_items_misc + [''] * (max_rows - len(all_items_misc)) |
|
st.write(f"""Claim : {claim_text}""") |
|
st.write(f"""Evidence : {evidence_text}""") |
|
st.table(final_df) |
|
else: |
|
st.warning("You need to input both the claim and evidence and then press Submit") |
|
|