File size: 9,247 Bytes
1d43b95
 
d1a4da6
1d43b95
 
 
 
 
 
 
 
 
 
 
 
 
 
ae34bdf
 
 
 
 
 
6080c36
ae34bdf
9f7e51c
1d43b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f39fb3
ae34bdf
7f39fb3
 
 
 
 
 
 
 
 
 
 
ae34bdf
1d43b95
 
 
 
 
 
ae34bdf
 
 
 
 
 
 
1d43b95
 
 
 
 
 
 
 
 
 
 
 
7f39fb3
1d43b95
 
 
 
ae34bdf
1d43b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f39fb3
 
 
 
fdd569a
7f39fb3
 
1d43b95
 
ae34bdf
 
 
 
 
 
 
 
1d43b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae34bdf
 
 
 
fdd569a
ae34bdf
fdd569a
 
 
1d43b95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import logging
import streamlit as st
from annotated_text import annotated_text
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
from nltk.corpus import stopwords,wordnet
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
import regex as re
import string
import subprocess
from PIL import Image
import multiprocessing
total_threads=multiprocessing.cpu_count()

try:
    import pke
    logging.error("importing pke info")
except:
    logging.error("installing pke info")
    subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
    subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en'])
    import pke

st.set_page_config(  # Alternate names: setup_page, page, layout
    layout="wide",  # Can be "centered" or "wide". In the future also "dashboard", etc.
    initial_sidebar_state="auto",  # Can be "auto", "expanded", "collapsed"
    page_title='None',  # String or None. Strings get appended with "• Streamlit".
)

def set_page_title(title):
    st.sidebar.markdown(unsafe_allow_html=True, body=f"""
        <iframe height=0 srcdoc="<script>
            const title = window.parent.document.querySelector('title') \

            const oldObserver = window.parent.titleObserver
            if (oldObserver) {{
                oldObserver.disconnect()
            }} \

            const newObserver = new MutationObserver(function(mutations) {{
                const target = mutations[0].target
                if (target.text !== '{title}') {{
                    target.text = '{title}'
                }}
            }}) \

            newObserver.observe(title, {{ childList: true }})
            window.parent.titleObserver = newObserver \

            title.text = '{title}'
        </script>" />
    """)


set_page_title('Fill Blanks')

default_paratext="""Another important distinction is between companies that build enterprise products (B2B - business to business) and companies that build customer products (B2C - business to consumer).

B2B companies build products for organizations. Examples of enterprise products are Customer relationship management (CRM) software, project management tools, database management systems, cloud hosting services, etc.

B2C companies build products for individuals. Examples of consumer products are social networks, search engines, ride-sharing services, health trackers, etc.

Many companies do both -- their products can be used by individuals but they also offer plans for enterprise users. For example, Google Drive can be used by anyone but they also have Google Drive for Enterprise.

Even if a B2C company doesn’t create products for enterprises directly, they might still need to sell to enterprises. For example, Facebook’s main product is used by individuals but they sell ads to enterprises. Some might argue that this makes Facebook users products, as famously quipped: “If you’re not paying for it, you’re not the customer; you’re the product being sold.14”

These two types of companies have different sales strategies and engineering requirements. Consumer products tend to rely on viral marketing (e.g. invite your friends and get your next order for free) to reach a large number of users. Selling enterprise products tends to require selling to each user separately.

Enterprise companies usually have the role of solutions architect and its variances (solutions engineer, enterprise architect) to work with enterprise customers to figure out how to use the tool for their use cases."""

def tokenize_sentence(text):
    sentences=sent_tokenize(text)
    sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
    return sentences


# extractor = pke.unsupervised.MultipartiteRank()
# extractor.load_document(input=default_paratext, language='en', normalization=None)
# extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
# extractor.candidate_weighting(threshold=0.74, method='average', alpha=1.1)
# keyphrases = extractor.get_n_best(n=5)
# print('keyphrases', keyphrases)

def get_noun_adj_verb(text):
    output = []
    try:
        extractor = pke.unsupervised.MultipartiteRank()
        extractor.load_document(input=text, language='en',normalization=None)
        # keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
        extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})

        # candidate weighting,
        extractor.candidate_weighting(threshold=0.74,method='average',alpha=1.1)

        #extract top n
        keyphrases = extractor.get_n_best(n=10)

        for val in keyphrases:
            output.append(val[0])
    except Exception as e:
        print("found exception",e)
    return output

def get_keywords_sentence(keywords,tokenized_sent):
    keyword_sent_dict = {}

    for k in keywords:
        keyword_sent_dict[k]=[]
        for s in tokenized_sent:
            if k in s:
                keyword_sent_dict[k].append(s)

    #sort sentnece in decreasing order of length
    for k in keyword_sent_dict.keys():
        values=keyword_sent_dict[k]
        values=sorted(values,key=len,reverse=True)
        keyword_sent_dict[k]=values

    return keyword_sent_dict

def create_blanks(keyword_sentence_dict):
    answer=[]
    fib=[]
    for k in keyword_sentence_dict.keys():
        if len(keyword_sentence_dict[k])<1:
            pass
        else:
            sent=keyword_sentence_dict[k][0]#take 1st sentence
            sent=re.sub(k,'____________',sent,1)
            answer.append(k)
            fib.append(sent)
    return answer,fib

# default_paratext = """On May 4, the Red Planet was rocked by a roughly magnitude 5 temblor, the largest Marsquake detected to date, NASA’s Jet Propulsion Laboratory in Pasadena, Calif., reports. The shaking lasted for more than six hours and released more than 10 times the energy of the previous record-holding quake.The U.S. space agency’s InSight lander, which has been studying Mars’ deep interior since touching down on the planet in 2018 (SN: 11/26/18), recorded the event. The quake probably originated near the Cerberus Fossae region, which is more than 1,000 kilometers from the lander.Cerberus Fossae is known for its fractured surface and frequent rockfalls. It makes sense that the ground would be shifting there, says geophysicist Philippe Lognonné, principal investigator of the Seismic Experiment for Interior Structure, InSight’s seismometer. “It’s an ancient volcanic bulge.Just like earthquakes reveal information about our planet’s interior structure, Marsquakes can be used to probe what lies beneath Mars’ surface (SN: 7/22/21). And a lot can be learned from studying this whopper of a quake, says Lognonné, of the Institut de Physique du Globe de Paris. “The signal is so good, we’ll be able to work on the details."""
# input_text=default_paratext
# tokenized_sent = tokenize_sentence(input_text)
# keywords_noun_adj_verb = get_noun_adj_verb(input_text)
# keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb, tokenized_sent=tokenized_sent)
# answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)


#title using markdown
st.markdown("<h1 style='text-align: center; color: #3366ff;'>Create Fill The Blanks Questions</h1>", unsafe_allow_html=True)
st.markdown("---")
with st.sidebar:
    # title using markdown
    st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
    select_task=st.selectbox(label="Select task from drop down menu",
                 options=['README',
                          'Basic Fill Blanks'])


img = Image.open("hf_space1.png")
if select_task=='README':
    st.header("Summary")
    st.write(f"The App gives you ability to create *Fill in the blanks* Capability just like Ed-Techs.Currently.It has {total_threads} CPU cores but only 1 is available per user so "
             f"inference time will be on the higher side.")
    st.markdown("---")
    st.image(img)

if select_task=='Basic Fill Blanks':
    input_text = st.text_area(label='Input paragraph', height=500, max_chars=2000, value=default_paratext)
    create_fib=st.button("Create Questions")

    if create_fib:
        tokenized_sent = tokenize_sentence(input_text)
        keywords_noun_adj_verb = get_noun_adj_verb(input_text)
        keyword_sent_noun_verb_adj = get_keywords_sentence(keywords=keywords_noun_adj_verb,tokenized_sent=tokenized_sent)
        answer, fib = create_blanks(keyword_sentence_dict=keyword_sent_noun_verb_adj)
        st.markdown('---')
        for i,(answer,fib) in enumerate(zip(answer,fib)):
            st.write(f"{i+1}. {fib} ")
            annotated_text("Answer is ", (answer,'',"#fea"))
            st.markdown('---')


# demo = gr.Interface(fn=get_noun_adj_verb,
#                     inputs=gr.inputs.Textbox(lines=10,default=default_paratext),
#                     outputs=gr.outputs.Textbox(),
#                     allow_flagging='never',
#                     layout='vertical',
#                     title="Make Fill in the Blanks using your text",
#                     )
#
# if __name__ == "__main__":
#     demo.launch()