MCQ-Generator / keywords.py
ashishraics's picture
1st app
4a6b1d2
raw
history blame
1.42 kB
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
from nltk.corpus import stopwords,wordnet
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
import regex as re
import string
import subprocess
import logging
try:
import pke
logging.error("importing pke info")
except:
logging.error("installing pke info")
subprocess.run(['pip3', 'install','git+https://github.com/boudinfl/pke.git'])
subprocess.run(['python3' ,'-m' ,'spacy' ,'download' ,'en'])
import pke
def tokenize_sentence(text):
sentences=sent_tokenize(text)
sentences=[s.strip().lstrip().rstrip() for s in sentences if len(s) > 20]
return sentences
def get_noun_adj_verb(text):
output = []
try:
extractor = pke.unsupervised.MultipartiteRank()
extractor.load_document(input=text, language='en',normalization=None)
# keyphrase candidate selection #'ADJ' 'ADP' 'ADV' 'AUX' 'DET' 'NOUN' 'NUM' 'PART' 'PROPN' 'PUNCT' 'VERB'
extractor.candidate_selection(pos={'NOUN', 'VERB', 'ADJ'})
# candidate weighting,
extractor.candidate_weighting(threshold=0.9,method='average',alpha=1.1)
#extract top n
keyphrases = extractor.get_n_best(n=5)
for val in keyphrases:
output.append(val[0])
except Exception as e:
print("found exception",e)
return list(set(output))