Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -250,6 +250,7 @@ phi4_model = AutoModelForCausalLM.from_pretrained(
|
|
250 |
device_map="auto",
|
251 |
torch_dtype="auto",
|
252 |
trust_remote_code=True,
|
|
|
253 |
)
|
254 |
|
255 |
# ------------------------------------------------------------------------------
|
@@ -562,7 +563,7 @@ def generate(
|
|
562 |
yield gr.Image(result_img)
|
563 |
return
|
564 |
|
565 |
-
# --- Phi-4 Multimodal branch (Image/Audio) ---
|
566 |
if text.strip().lower().startswith("@phi4"):
|
567 |
question = text[len("@phi4"):].strip()
|
568 |
if not files:
|
@@ -601,18 +602,28 @@ def generate(
|
|
601 |
yield "Invalid file type for @phi4 multimodal processing."
|
602 |
return
|
603 |
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
return
|
617 |
|
618 |
# --- Text and TTS branch ---
|
|
|
250 |
device_map="auto",
|
251 |
torch_dtype="auto",
|
252 |
trust_remote_code=True,
|
253 |
+
_attn_implementation="eager",
|
254 |
)
|
255 |
|
256 |
# ------------------------------------------------------------------------------
|
|
|
563 |
yield gr.Image(result_img)
|
564 |
return
|
565 |
|
566 |
+
# --- Phi-4 Multimodal branch (Image/Audio) with Streaming ---
|
567 |
if text.strip().lower().startswith("@phi4"):
|
568 |
question = text[len("@phi4"):].strip()
|
569 |
if not files:
|
|
|
602 |
yield "Invalid file type for @phi4 multimodal processing."
|
603 |
return
|
604 |
|
605 |
+
# Initialize the streamer
|
606 |
+
streamer = TextIteratorStreamer(phi4_processor, skip_prompt=True, skip_special_tokens=True)
|
607 |
+
|
608 |
+
# Prepare generation kwargs
|
609 |
+
generation_kwargs = {
|
610 |
+
**inputs,
|
611 |
+
"streamer": streamer,
|
612 |
+
"max_new_tokens": 200,
|
613 |
+
"num_logits_to_keep": 0,
|
614 |
+
}
|
615 |
+
|
616 |
+
# Start generation in a separate thread
|
617 |
+
thread = Thread(target=phi4_model.generate, kwargs=generation_kwargs)
|
618 |
+
thread.start()
|
619 |
+
|
620 |
+
# Stream the response
|
621 |
+
buffer = ""
|
622 |
+
yield "🤔 Processing with Phi-4..."
|
623 |
+
for new_text in streamer:
|
624 |
+
buffer += new_text
|
625 |
+
time.sleep(0.01) # Small delay to simulate real-time streaming
|
626 |
+
yield buffer
|
627 |
return
|
628 |
|
629 |
# --- Text and TTS branch ---
|