v1
Browse files
app.py
CHANGED
@@ -118,30 +118,8 @@ demo = gr.ChatInterface(fn=bot_streaming,
|
|
118 |
additional_inputs = [gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 1024, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
|
119 |
additional_inputs_accordion="Generation Hyperparameters",
|
120 |
theme=gr.themes.Soft(),
|
121 |
-
title="
|
122 |
-
description="
|
123 |
"Its inference speed highly depends on assinging non-scheduled GPU. (Therefore, once all GPUs are busy, then inference may be taken in infinity)",
|
124 |
stop_btn="Stop Generation", multimodal=True)
|
125 |
-
demo.launch()
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
# Generate
|
139 |
-
with torch.inference_mode():
|
140 |
-
_inputs = model.eval_process(inputs=inputs,
|
141 |
-
data='demo',
|
142 |
-
tokenizer=tokenizer,
|
143 |
-
device='cuda:0',
|
144 |
-
img_token_number=image_token_number)
|
145 |
-
generate_ids = model.generate(**_inputs, max_new_tokens=256, use_cache=True)
|
146 |
-
response = output_filtering(tokenizer.batch_decode(generate_ids, skip_special_tokens=False)[0], model)
|
147 |
-
print(response)
|
|
|
118 |
additional_inputs = [gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 1024, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
|
119 |
additional_inputs_accordion="Generation Hyperparameters",
|
120 |
theme=gr.themes.Soft(),
|
121 |
+
title="TroL",
|
122 |
+
description="TroL is efficient 1.8B, 3.8B, and 7B size Large Language and Vision Models built on new propagation strategy\n"
|
123 |
"Its inference speed highly depends on assinging non-scheduled GPU. (Therefore, once all GPUs are busy, then inference may be taken in infinity)",
|
124 |
stop_btn="Stop Generation", multimodal=True)
|
125 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|