|
|
|
from transformers import FillMaskPipeline ,DistilBertTokenizer,TFAutoModelForMaskedLM |
|
from transformers import BertTokenizer |
|
|
|
|
|
tokenizer_path_1="./vocab.txt" |
|
tokenizer_1 = BertTokenizer.from_pretrained(tokenizer_path_1) |
|
|
|
|
|
model_path="./bert_lm_10" |
|
model_1 = TFAutoModelForMaskedLM.from_pretrained(model_path) |
|
|
|
|
|
unmasker = FillMaskPipeline(model=model_1,tokenizer=tokenizer_1) |
|
|
|
|
|
txt="a polynomial [MASK] from 3-SAT." |
|
|
|
|
|
|
|
for res in results: |
|
print(res["sequence"]) |
|
print(res["score"]) |
|
|
|
|
|
|
|
def unmask_words(txt_with_mask,k_suggestions=5): |
|
results=unmasker(txt_with_mask,top_k=k_suggestions) |
|
labels={} |
|
for res in results: |
|
labels["".join(res["token_str"].split(" "))]=res["score"] |
|
return labels |
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
description="""CC bert is a MLM model pretrained on data collected from ~200k papers in mainly Computational Complexity |
|
or related domain. For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb) |
|
or contact [[email protected]]([email protected]). |
|
|
|
""" |
|
examples=[["as pspace is [MASK] under complement."], |
|
["n!-(n-1)[MASK]"], |
|
["[MASK] these two classes is a major problem."], |
|
["This would show that the polynomial heirarchy at the second [MASK], which is considered only"], |
|
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data, |
|
and their combined [MASK]"""] |
|
] |
|
|
|
|
|
|
|
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:") |
|
interface=gr.Interface(fn=unmask_words,inputs=[input_box, |
|
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")], |
|
outputs=gr.outputs.Label(label="top words:"), |
|
examples=examples, |
|
title="CC-Bert MLM",description=description) |
|
|
|
interface.launch(debug=True,share=True,auth=("test", "test")) |