PHI4-Multimodal

Running on Zero

prithivMLmods commited on 1 day ago

Commit

dadbd9f

verified ·

1 Parent(s): 3b016e9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -250,6 +250,7 @@ phi4_model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     torch_dtype="auto",
     trust_remote_code=True,
 )
 # ------------------------------------------------------------------------------
@@ -562,7 +563,7 @@ def generate(
         yield gr.Image(result_img)
         return
-    # --- Phi-4 Multimodal branch (Image/Audio) ---
     if text.strip().lower().startswith("@phi4"):
         question = text[len("@phi4"):].strip()
         if not files:
@@ -601,18 +602,28 @@ def generate(
             yield "Invalid file type for @phi4 multimodal processing."
             return
-        with torch.no_grad():
-            generate_ids = phi4_model.generate(
-                **inputs,
-                max_new_tokens=200,
-                num_logits_to_keep=0,
-            )
-        input_length = inputs['input_ids'].shape[1]
-        generate_ids = generate_ids[:, input_length:]
-        response = phi4_processor.batch_decode(
-            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )[0]
-        yield response
         return
     # --- Text and TTS branch ---

     device_map="auto",
     torch_dtype="auto",
     trust_remote_code=True,
+    _attn_implementation="eager",
 )
 # ------------------------------------------------------------------------------
         yield gr.Image(result_img)
         return
+    # --- Phi-4 Multimodal branch (Image/Audio) with Streaming ---
     if text.strip().lower().startswith("@phi4"):
         question = text[len("@phi4"):].strip()
         if not files:
             yield "Invalid file type for @phi4 multimodal processing."
             return
+        # Initialize the streamer
+        streamer = TextIteratorStreamer(phi4_processor, skip_prompt=True, skip_special_tokens=True)
+        # Prepare generation kwargs
+        generation_kwargs = {
+            **inputs,
+            "streamer": streamer,
+            "max_new_tokens": 200,
+            "num_logits_to_keep": 0,
+        }
+        # Start generation in a separate thread
+        thread = Thread(target=phi4_model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Stream the response
+        buffer = ""
+        yield "🤔 Processing with Phi-4..."
+        for new_text in streamer:
+            buffer += new_text
+            time.sleep(0.01)  # Small delay to simulate real-time streaming
+            yield buffer
         return
     # --- Text and TTS branch ---