SmolVLM-trl-dpo-rlaif-v

Running on Zero

sergiopaniego commited on Dec 17, 2024

Commit

1bb5760

verified ·

1 Parent(s): f107fb0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ DESCRIPTION = """
 This is a demo Space for a fine-tuned version of [SmolVLM](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) trained using [rlaif-v dataset](https://huggingface.co/datasets/HuggingFaceH4/rlaif-v_formatted).
-The corresponding model is located [here](https://huggingface.co/sergiopaniego/smolvlm-instruct-trl-dpo-rlaif-v).
 """
 model_id = "HuggingFaceTB/SmolVLM-Instruct"
@@ -72,9 +72,15 @@ def run_example(image, text_input=None):
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
-    image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
-        text=[text],
         images=image_inputs,
         videos=video_inputs,
         padding=True,
@@ -103,7 +109,7 @@ css = """
 with gr.Blocks(css=css) as demo:
     gr.Markdown(DESCRIPTION)
-    with gr.Tab(label="SmolVLM-trl-dpo-rlaif-v Input"):
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Picture")

 This is a demo Space for a fine-tuned version of [SmolVLM](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) trained using [rlaif-v dataset](https://huggingface.co/datasets/HuggingFaceH4/rlaif-v_formatted).
+The corresponding model is located [here](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct-DPO).
 """
 model_id = "HuggingFaceTB/SmolVLM-Instruct"
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
+    #image_inputs, video_inputs = process_vision_info(messages)
+    image_inputs = []
+    #image = messages['images'][0]
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image_inputs.append([image])
     inputs = processor(
+        text=text,
         images=image_inputs,
         videos=video_inputs,
         padding=True,
 with gr.Blocks(css=css) as demo:
     gr.Markdown(DESCRIPTION)
+    with gr.Tab(label="SmolVLM-Instruct-DPO Input"):
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Picture")