Spaces:

BK-Lee
/

TroL

Sleeping

App Files Files Community

BK-Lee commited on Jun 18, 2024

Commit

a29da54

1 Parent(s): 9edaf8c

v1

Browse files

Files changed (1) hide show

app.py +20 -10

app.py CHANGED Viewed

@@ -24,23 +24,21 @@ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENT
 # accel
 accel = Accelerator()
-# model selection
-link = "TroL-7B" # [Select One] 'TroL-1.8B' | 'TroL-3.8B' | 'TroL-7B'
 # User prompt
 prompt_type="with_image" # Select one option "text_only", "with_image"
 img_path='figures/demo.png'
 question="What is the troll doing? Provide the detail in the image and imagine what the event happens."
 # loading model
-model, tokenizer = load_trol(link=link)
-# cpu -> gpu
-for param in model.parameters():
-    if not param.is_cuda:
-        param.data = param.to('cuda:0')
-def threading_function(inputs, image_token_number, streamer, device, temperature, new_max_token, top_p):
     # propagation
     _inputs = model.eval_process(inputs=inputs,
@@ -60,6 +58,16 @@ def threading_function(inputs, image_token_number, streamer, device, temperature
 @spaces.GPU
 def bot_streaming(message, history, link, temperature, new_max_token, top_p):
     try:
         # prompt type -> input prompt
         image_token_number = None
@@ -83,6 +91,8 @@ def bot_streaming(message, history, link, temperature, new_max_token, top_p):
             thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
                                                                 image_token_number=image_token_number,
                                                                 streamer=streamer,
                                                                 device=accel.device,
                                                                 temperature=temperature,
                                                                 new_max_token=new_max_token,
@@ -115,7 +125,7 @@ def bot_streaming(message, history, link, temperature, new_max_token, top_p):
         yield buffer
 demo = gr.ChatInterface(fn=bot_streaming,
-                        additional_inputs = [gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 1024, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
                         additional_inputs_accordion="Generation Hyperparameters",
                         theme=gr.themes.Soft(),
                         title="TroL",

 # accel
 accel = Accelerator()
 # User prompt
 prompt_type="with_image" # Select one option "text_only", "with_image"
 img_path='figures/demo.png'
 question="What is the troll doing? Provide the detail in the image and imagine what the event happens."
 # loading model
+model_1_8, tokenizer_1_8 = load_trol(link='TroL-1.8B')
+# loading model
+model_3_8, tokenizer_3_8 = load_trol(link='TroL-3.8B')
+# loading model
+model_7, tokenizer_7 = load_trol(link='TroL-7B')
+def threading_function(inputs, image_token_number, streamer, device, model, tokenizer, temperature, new_max_token, top_p):
     # propagation
     _inputs = model.eval_process(inputs=inputs,
 @spaces.GPU
 def bot_streaming(message, history, link, temperature, new_max_token, top_p):
+    if "1.8B" in link:
+        model = model_1_8
+        tokenizer = tokenizer_1_8
+    elif "3.8B" in link:
+        model = model_3_8
+        tokenizer = tokenizer_3_8
+    elif "7B" in link:
+        model = model_7
+        tokenizer = tokenizer_7
     try:
         # prompt type -> input prompt
         image_token_number = None
             thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
                                                                 image_token_number=image_token_number,
                                                                 streamer=streamer,
+                                                                model=model,
+                                                                tokenizer=tokenizer,
                                                                 device=accel.device,
                                                                 temperature=temperature,
                                                                 new_max_token=new_max_token,
         yield buffer
 demo = gr.ChatInterface(fn=bot_streaming,
+                        additional_inputs = [gr.Radio(["1.8B", "3.8B", "7B"], label="Size", info="Select one model size"), gr.Slider(0, 1, 0.9, label="temperature"), gr.Slider(1, 1024, 128, label="new_max_token"), gr.Slider(0, 1, 0.95, label="top_p")],
                         additional_inputs_accordion="Generation Hyperparameters",
                         theme=gr.themes.Soft(),
                         title="TroL",