prithivMLmods commited on
Commit
dadbd9f
·
verified ·
1 Parent(s): 3b016e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -13
app.py CHANGED
@@ -250,6 +250,7 @@ phi4_model = AutoModelForCausalLM.from_pretrained(
250
  device_map="auto",
251
  torch_dtype="auto",
252
  trust_remote_code=True,
 
253
  )
254
 
255
  # ------------------------------------------------------------------------------
@@ -562,7 +563,7 @@ def generate(
562
  yield gr.Image(result_img)
563
  return
564
 
565
- # --- Phi-4 Multimodal branch (Image/Audio) ---
566
  if text.strip().lower().startswith("@phi4"):
567
  question = text[len("@phi4"):].strip()
568
  if not files:
@@ -601,18 +602,28 @@ def generate(
601
  yield "Invalid file type for @phi4 multimodal processing."
602
  return
603
 
604
- with torch.no_grad():
605
- generate_ids = phi4_model.generate(
606
- **inputs,
607
- max_new_tokens=200,
608
- num_logits_to_keep=0,
609
- )
610
- input_length = inputs['input_ids'].shape[1]
611
- generate_ids = generate_ids[:, input_length:]
612
- response = phi4_processor.batch_decode(
613
- generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
614
- )[0]
615
- yield response
 
 
 
 
 
 
 
 
 
 
616
  return
617
 
618
  # --- Text and TTS branch ---
 
250
  device_map="auto",
251
  torch_dtype="auto",
252
  trust_remote_code=True,
253
+ _attn_implementation="eager",
254
  )
255
 
256
  # ------------------------------------------------------------------------------
 
563
  yield gr.Image(result_img)
564
  return
565
 
566
+ # --- Phi-4 Multimodal branch (Image/Audio) with Streaming ---
567
  if text.strip().lower().startswith("@phi4"):
568
  question = text[len("@phi4"):].strip()
569
  if not files:
 
602
  yield "Invalid file type for @phi4 multimodal processing."
603
  return
604
 
605
+ # Initialize the streamer
606
+ streamer = TextIteratorStreamer(phi4_processor, skip_prompt=True, skip_special_tokens=True)
607
+
608
+ # Prepare generation kwargs
609
+ generation_kwargs = {
610
+ **inputs,
611
+ "streamer": streamer,
612
+ "max_new_tokens": 200,
613
+ "num_logits_to_keep": 0,
614
+ }
615
+
616
+ # Start generation in a separate thread
617
+ thread = Thread(target=phi4_model.generate, kwargs=generation_kwargs)
618
+ thread.start()
619
+
620
+ # Stream the response
621
+ buffer = ""
622
+ yield "🤔 Processing with Phi-4..."
623
+ for new_text in streamer:
624
+ buffer += new_text
625
+ time.sleep(0.01) # Small delay to simulate real-time streaming
626
+ yield buffer
627
  return
628
 
629
  # --- Text and TTS branch ---