Spaces:

BK-Lee
/

TroL

Sleeping

App Files Files Community

BK-Lee commited on Jun 18, 2024

Commit

c9961ab

1 Parent(s): 74321a7

v1

Browse files

Files changed (1) hide show

app.py +13 -4

app.py CHANGED Viewed

@@ -38,6 +38,8 @@ model_3_8, tokenizer_3_8 = load_trol(link='TroL-3.8B')
 # loading model
 model_7, tokenizer_7 = load_trol(link='TroL-7B')
 def threading_function(inputs, image_token_number, streamer, device, model, tokenizer, temperature, new_max_token, top_p):
     # propagation
@@ -58,6 +60,7 @@ def threading_function(inputs, image_token_number, streamer, device, model, toke
 @spaces.GPU
 def bot_streaming(message, history, link, temperature, new_max_token, top_p):
     if "1.8B" in link:
         model = model_1_8
         tokenizer = tokenizer_1_8
@@ -67,14 +70,19 @@ def bot_streaming(message, history, link, temperature, new_max_token, top_p):
     elif "7B" in link:
         model = model_7
         tokenizer = tokenizer_7
     try:
         # prompt type -> input prompt
         image_token_number = None
         if len(message['files']) != 0:
             # Image Load
             image = pil_to_tensor(Image.open(Image.open(message['files'][0]).convert("RGB")).convert("RGB"))
-            if not "3.8B" in link:
                 image_token_number = 1225
                 image = F.interpolate(image.unsqueeze(0), size=(490, 490), mode='bicubic').squeeze(0)
             inputs = [{'image': image, 'question': message['text']}]
@@ -129,7 +137,8 @@ demo = gr.ChatInterface(fn=bot_streaming,
                         additional_inputs_accordion="Generation Hyperparameters",
                         theme=gr.themes.Soft(),
                         title="TroL",
-                        description="TroL is efficient 1.8B, 3.8B, and 7B size Large Language and Vision Models built on new propagation strategy\n"
-                                    "Its inference speed highly depends on assinging non-scheduled GPU. (Therefore, once all GPUs are busy, then inference may be taken in infinity)",
                         stop_btn="Stop Generation", multimodal=True)
 demo.launch()

 # loading model
 model_7, tokenizer_7 = load_trol(link='TroL-7B')
+print()
 def threading_function(inputs, image_token_number, streamer, device, model, tokenizer, temperature, new_max_token, top_p):
     # propagation
 @spaces.GPU
 def bot_streaming(message, history, link, temperature, new_max_token, top_p):
+    # model selection
     if "1.8B" in link:
         model = model_1_8
         tokenizer = tokenizer_1_8
     elif "7B" in link:
         model = model_7
         tokenizer = tokenizer_7
+    # cpu -> gpu
+    for param in model.parameters():
+        if not param.is_cuda:
+            param.data = param.to(accel.device)
     try:
         # prompt type -> input prompt
         image_token_number = None
         if len(message['files']) != 0:
             # Image Load
             image = pil_to_tensor(Image.open(Image.open(message['files'][0]).convert("RGB")).convert("RGB"))
+            if "3.8B" not in link:
                 image_token_number = 1225
                 image = F.interpolate(image.unsqueeze(0), size=(490, 490), mode='bicubic').squeeze(0)
             inputs = [{'image': image, 'question': message['text']}]
                         additional_inputs_accordion="Generation Hyperparameters",
                         theme=gr.themes.Soft(),
                         title="TroL",
+                        description="TroL is efficient 1.8B, 3.8B, and 7B size Large Language and Vision Models built on new propagation strategy. "
+                                    "Its inference speed highly depends on assinging non-scheduled GPU. (Therefore, once all GPUs are busy, then inference may be taken in infinity) "
+                                    "Note that, we don't support history-based conversation referring to previous dialogue",
                         stop_btn="Stop Generation", multimodal=True)
 demo.launch()