Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,878 Bytes
9c078e7 4a8131a 56f924f 9c078e7 56f924f 0e24f0d a345dc2 b1a9b5c 9c078e7 aecc737 0e24f0d aecc737 9c078e7 99d2247 7c5f508 56f924f 4d92f8a 56f924f 4d92f8a 56f924f 4d92f8a e1ae004 4d92f8a e1ae004 a345dc2 0e24f0d 99d2247 0e24f0d c35893e d6c9c32 c35893e 44cf124 c35893e 44cf124 c35893e bb04fc2 a345dc2 44cf124 a345dc2 44cf124 a345dc2 bb04fc2 c35893e 44cf124 0e24f0d 44cf124 a345dc2 d6c9c32 a345dc2 d6c9c32 0e24f0d aecc737 9c078e7 c35893e a345dc2 9c078e7 0e24f0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("BatsResearch/bonito-v1")
tokenizer = AutoTokenizer.from_pretrained("BatsResearch/bonito-v1")
model.to("cuda")
@spaces.GPU
def respond(
message,
task_type,
max_tokens,
temperature,
top_p,
):
task_type = task_type.lower()
input_text = "<|tasktype|>\n" + task_type.strip()
input_text += "\n<|context|>\n" + message.strip() + "\n<|task|>\n"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda")
output = model.generate(
input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
)
pred_start = int(input_ids.shape[-1])
response = tokenizer.decode(output[0][pred_start:], skip_special_tokens=True)
# check if <|pipe|> is in the response
if "<|pipe|>" in response:
pair = response.split("<|pipe|>")
instruction = pair[0].strip().replace("{{context}}", message)
response = pair[1].strip()
else:
# fallback
instruction = pair[0].strip().replace("{{context}}", message)
response = "Unable to generate response. Please regenerate."
return instruction, response
task_types = [
"extractive question answering",
"multiple-choice question answering",
"question generation",
"question answering without choices",
"yes-no question answering",
"coreference resolution",
"paraphrase generation",
"paraphrase identification",
"sentence completion",
"sentiment",
"summarization",
"text generation",
"topic classification",
"word sense disambiguation",
"textual entailment",
"natural language inference",
]
# capitalize for better readability
task_types = [task_type.capitalize() for task_type in task_types]
description = """
This is a demo for Bonito, an open-source model for conditional task generation: the task of converting unannotated text into task-specific synthetic instruction tuning data.
## More details on Bonito
Model: https://huggingface.co./BatsResearch/bonito-v1
Paper: https://arxiv.org/abs/2402.18334
GitHub: https://github.com/BatsResearch/bonito
## Instructions
Try out the model by entering a context and selecting a task type from the dropdown. The model will generate a task instruction based on the context and task type you provide.
"""
examples = [[
(
"""2.3 Provided that the Recipient has a written agreement with the following persons or entities requiring them to treat the Confidential Information in accordance with this Agreement, the Recipient may disclose the Confidential Information to: 2.3.1 Any other party with the Discloser's prior written consent; and 2.3.2 the Recipient's employees, officials, representatives and agents who have a strict need to know the contents of the Confidential Information, and employees, officials, representatives and agents of any legal entity that it controls, controls it, or with which it is under common control, who have a similar need to know the contents of the Confidential Information, provided that, for these purposes a controlled legal entity means:""",
"""Chronic rhinosinusitis (CRS) is a heterogeneous disease with an uncertain pathogenesis. Group 2 innate lymphoid cells (ILC2s) represent a recently discovered cell population which has been implicated in driving Th2 inflammation in CRS; however, their relationship with clinical disease characteristics has yet to be investigated. The aim of this study was to identify ILC2s in sinus mucosa in patients with CRS and controls and compare ILC2s across characteristics of disease. A cross-sectional study of patients with CRS undergoing endoscopic sinus surgery was conducted. Sinus mucosal biopsies were obtained during surgery and control tissue from patients undergoing pituitary tumour resection through transphenoidal approach. ILC2s were identified as CD45(+) Lin(-) CD127(+) CD4(-) CD8(-) CRTH2(CD294)(+) CD161(+) cells in single cell suspensions through flow cytometry. ILC2 frequencies, measured as a percentage of CD45(+) cells, were compared across CRS phenotype, endotype, inflammatory CRS subtype and other disease characteristics including blood eosinophils, serum IgE, asthma status and nasal symptom score. 35 patients (40% female, age 48 ± 17 years) including 13 with eosinophilic CRS (eCRS), 13 with non-eCRS and 9 controls were recruited. ILC2 frequencies were associated with the presence of nasal polyps (P = 0.002) as well as high tissue eosinophilia (P = 0.004) and eosinophil-dominant CRS (P = 0.001) (Mann-Whitney U). They were also associated with increased blood eosinophilia (P = 0.005). There were no significant associations found between ILC2s and serum total IgE and allergic disease. In the CRS with nasal polyps (CRSwNP) population, ILC2s were increased in patients with co-existing asthma (P = 0.03). ILC2s were also correlated with worsening nasal symptom score in CRS (P = 0.04).""",
),
(
"Natural language inference",
"Yes-no question answering",
),
]]
example_context = """2.3 Provided that the Recipient has a written agreement with the following persons or entities requiring them to treat the Confidential Information in accordance with this Agreement, the Recipient may disclose the Confidential Information to: 2.3.1 Any other party with the Discloser's prior written consent; and 2.3.2 the Recipient's employees, officials, representatives and agents who have a strict need to know the contents of the Confidential Information, and employees, officials, representatives and agents of any legal entity that it controls, controls it, or with which it is under common control, who have a similar need to know the contents of the Confidential Information, provided that, for these purposes a controlled legal entity means:"""
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Context", lines=5, placeholder="Enter context here..", value=example_context),
gr.Dropdown(
task_types,
value="Natural language inference",
label="Task type",
),
],
outputs=[
gr.Textbox(
label="Instruction",
lines=5,
),
gr.Textbox(label="Response"),
],
additional_inputs=[
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
title="Bonito",
description=description,
)
if __name__ == "__main__":
demo.launch()
|