|
import gradio as gr |
|
import pandas as pd |
|
import os |
|
import time |
|
from transformers import pipeline,GPT2Tokenizer,OPTForCausalLM |
|
|
|
model=OPTForCausalLM.from_pretrained('pushkarraj/pushkar_OPT_paraphaser') |
|
tokenizer=GPT2Tokenizer.from_pretrained('pushkarraj/pushkar_OPT_paraphaser',truncation=True) |
|
|
|
generator=pipeline("text-generation",model=model,tokenizer=tokenizer,device=0) |
|
|
|
def cleaned_para(input_sentence): |
|
p=generator('<s>'+input_sentence+ '</s>>>>><p>',do_sample=True,max_length=len(input_sentence.split(" "))+200,temperature = 0.8,repetition_penalty=1.2,top_p=0.4,top_k=1) |
|
return p[0]['generated_text'].split('</s>>>>><p>')[1].split('</p>')[0] |
|
|
|
from __future__ import unicode_literals, print_function |
|
from spacy.lang.en import English |
|
|
|
def sentensizer(raw_text): |
|
nlp = English() |
|
nlp.add_pipe("sentencizer") |
|
doc = nlp(raw_text) |
|
sentences = [sent for sent in doc.sents] |
|
print(sentences) |
|
return sentences |
|
|
|
def paraphraser(text): |
|
begin=time.time() |
|
x=[cleaned_para(str(i)) for i in sentensizer(text)] |
|
end=time.time() |
|
return (".".join(x)) |
|
|
|
interface=gr.Interface(fn=paraphraser,inputs="text",outputs="text",title="Paraphraser") |
|
|
|
interface.launch() |
|
|