FillTheBlanks / app.py
ashishraics's picture
error fix
593d5c4
raw
history blame
6.97 kB
import logging
import streamlit as st
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
from nltk.corpus import stopwords,wordnet
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
import regex as re
import string
import subprocess
from PIL import Image
import multiprocessing
total_threads=multiprocessing.cpu_count()
subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
import pke
st.set_page_config( # Alternate names: setup_page, page, layout
layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
page_title='None', # String or None. Strings get appended with "• Streamlit".
)
def set_page_title(title):
st.sidebar.markdown(unsafe_allow_html=True, body=f"""
<iframe height=0 srcdoc="<script>
const title = window.parent.document.querySelector('title') \
const oldObserver = window.parent.titleObserver
if (oldObserver) {{
oldObserver.disconnect()
}} \
const newObserver = new MutationObserver(function(mutations) {{
const target = mutations[0].target
if (target.text !== '{title}') {{
target.text = '{title}'
}}
}}) \
newObserver.observe(title, {{ childList: true }})
window.parent.titleObserver = newObserver \
title.text = '{title}'
</script>" />
""")
set_page_title('Fill Blanks')
def tokenize_sentence(text):
sentences=sent_tokenize(text)
sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
return sentences
def get_noun_adj_verb(text):
output = []
try:
extractor = pke.unsupervised.MultipartiteRank()
extractor.load_document(input=text, language='en',normalization=None)
# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
# candidate weighting,
extractor.candidate_weighting(threshold=0.74,method='average',alpha=1.1)
#extract top n
keyphrases = extractor.get_n_best(n=5)
for val in keyphrases:
output.append(val[0])
except Exception as e:
pass
return output
def get_keywords_sentence(keywords,tokenized_sent):
keyword_sent_dict = {}
for k in keywords:
keyword_sent_dict[k]=[]
for s in tokenized_sent:
if k in s:
keyword_sent_dict[k].append(s)
#sort sentnece in decreasing order of length
for k in keyword_sent_dict.keys():
values=keyword_sent_dict[k]
values=sorted(values,key=len,reverse=True)
keyword_sent_dict[k]=values
return keyword_sent_dict
def create_blanks(keyword_sentence_dict):
answer=[]
fib=[]
for k in keyword_sentence_dict.keys():
sent=keyword_sentence_dict[k][0]#take 1st sentence
sent=re.sub(k,'____________',sent)
answer.append(k)
fib.append(sent)
return answer,fib
#title using markdown
st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
st.markdown("---")
with st.sidebar:
# title using markdown
st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
select_task=st.selectbox(label="Select task from drop down menu",
options=['README',
'Basic Fill Blanks'])
default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
img = Image.open("hf_space1.png")
if select_task=='README':
st.header("Summary")
st.write(f"The App gives you ability to create *Fill in the blanks* Capability just like Ed-Techs.Currently.It has {total_threads} CPU cores but only 1 is available per user so "
f"inference time will be on the higher side.")
st.markdown("---")
st.image(img)
if select_task=='Basic Fill Blanks':
input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
create_fib=st.button("Create Questions")
tokenized_sent = tokenize_sentence(input_text)
keywords_noun_adj_verb = get_noun_adj_verb(input_text)
keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
for i, (answer, fib) in enumerate(zip(answer, fib)):
st.markdown(f"* {fib} | **Answer is *{answer}* ** ", unsafe_allow_html=True)
if create_fib:
st.write(1)
with st.spinner("Creating"):
tokenized_sent = tokenize_sentence(input_text)
keywords_noun_adj_verb = get_noun_adj_verb(input_text)
keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
for i,(answer,fib) in enumerate(zip(answer,fib)):
st.markdown(f"* {fib} | **Answer is *{answer}* ** ",unsafe_allow_html=True)
# demo = gr.Interface(fn=get_noun_adj_verb,
# inputs=gr.inputs.Textbox(lines=10,default=default_paratext),
# outputs=gr.outputs.Textbox(),
# allow_flagging='never',
# layout='vertical',
# title="Make Fill in the Blanks using your text",
# )
#
# if __name__ == "__main__":
# demo.launch()