PHI4-Multimodal

Running on Zero

App Files Files Community

prithivMLmods commited on 14 days ago

Commit

c44519e

verified ·

1 Parent(s): e50d8bd

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -90

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import tempfile
 from threading import Thread
 import base64
 import shutil
-import re  # Added for the new tools
 import gradio as gr
 import spaces
@@ -17,7 +16,6 @@ import numpy as np
 from PIL import Image
 import edge_tts
 import trimesh
-import smolagents  # For the new tools
 from transformers import (
     AutoModelForCausalLM,
@@ -278,73 +276,28 @@ def generate_image_fn(
     return image_paths, seed
 # -----------------------------------------------------------------------------
-# Tools for Web Search and Webpage Visiting using DuckDuckGo and smolagents
 # -----------------------------------------------------------------------------
-from typing import Any, Optional
-from smolagents.tools import Tool
-import requests
-import markdownify
-import duckduckgo_search
-class VisitWebpageTool(Tool):
-    name = "visit_webpage"
-    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
-    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
-    output_type = "string"
-    def forward(self, url: str) -> str:
-        try:
-            from markdownify import markdownify
-            from requests.exceptions import RequestException
-            from smolagents.utils import truncate_content
-        except ImportError as e:
-            raise ImportError(
-                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
-            ) from e
-        try:
-            response = requests.get(url, timeout=20)
-            response.raise_for_status()  # Raise an exception for bad status codes
-            # Convert the HTML content to Markdown
-            markdown_content = markdownify.markdownify(response.text).strip()
-            # Remove multiple line breaks
-            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
-            return truncate_content(markdown_content, 10000)
-        except requests.exceptions.Timeout:
-            return "The request timed out. Please try again later or check the URL."
-        except RequestException as e:
-            return f"Error fetching the webpage: {str(e)}"
-        except Exception as e:
-            return f"An unexpected error occurred: {str(e)}"
-class DuckDuckGoSearchTool(Tool):
-    name = "web_search"
-    description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
-    inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
-    output_type = "string"
-    def __init__(self, max_results=10, **kwargs):
-        super().__init__()
-        self.max_results = max_results
-        try:
-            from duckduckgo_search import DDGS
-        except ImportError as e:
-            raise ImportError(
-                "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
-            ) from e
-        self.ddgs = DDGS(**kwargs)
-    def forward(self, query: str) -> str:
-        results = self.ddgs.text(query, max_results=self.max_results)
-        if len(results) == 0:
-            raise Exception("No results found! Try a less restrictive/shorter query.")
-        postprocessed_results = [
-            f"[{result['title']}]({result['href']})\n{result['body']}" for result in results
-        ]
-        return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
 # -----------------------------------------------------------------------------
-# Chat Generation Function with support for @tts, @image, @3d, and @web commands
 # -----------------------------------------------------------------------------
 @spaces.GPU
@@ -359,14 +312,12 @@ def generate(
 ):
     """
     Generates chatbot responses with support for multimodal input, TTS, image generation,
-    3D model generation, and web search/webpage visiting.
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
       - "@3d": triggers 3D model generation using the ShapE pipeline.
-      - "@web": triggers a web command. Use "visit" to visit a URL (e.g., "@web visit https://example.com")
-                or "search" to perform a DuckDuckGo search (e.g., "@web search AI news").
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
@@ -413,26 +364,6 @@ def generate(
         yield gr.Image(image_paths[0])
         return
-    # --- Web Search/Visit branch ---
-    if text.strip().lower().startswith("@web"):
-        command_text = text[len("@web"):].strip()
-        if command_text.lower().startswith("visit "):
-            url = command_text[len("visit"):].strip()
-            yield "Visiting webpage..."
-            result = VisitWebpageTool().forward(url)
-            yield result
-        elif command_text.lower().startswith("search "):
-            query = command_text[len("search"):].strip()
-            yield "Performing web search..."
-            result = DuckDuckGoSearchTool().forward(query)
-            yield result
-        else:
-            # Default to web search if no subcommand is specified.
-            yield "Performing web search..."
-            result = DuckDuckGoSearchTool().forward(command_text)
-            yield result
-        return
     # --- Text and TTS branch ---
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
@@ -525,10 +456,9 @@ demo = gr.ChatInterface(
     examples=[
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
         ["@3d A birthday cupcake with cherry"],
-        ["@web Is Grok-3 Beats DeepSeek-R1 at Reasoning ?"],
         [{"text": "summarize the letter", "files": ["examples/1.png"]}],
         ["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
-        ["Explain newton's third law"],
         ["@tts2 What causes rainbows to form?"],
     ],
     cache_examples=False,
@@ -550,4 +480,5 @@ from fastapi.staticfiles import StaticFiles
 demo.app.mount("/static", StaticFiles(directory="static"), name="static")
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=True)

 from threading import Thread
 import base64
 import shutil
 import gradio as gr
 import spaces
 from PIL import Image
 import edge_tts
 import trimesh
 from transformers import (
     AutoModelForCausalLM,
     return image_paths, seed
 # -----------------------------------------------------------------------------
+# Text-to-3D Generation using the ShapE Pipeline
 # -----------------------------------------------------------------------------
+@spaces.GPU(duration=120, enable_queue=True)
+def generate_3d_fn(
+    prompt: str,
+    seed: int = 1,
+    guidance_scale: float = 15.0,
+    num_steps: int = 64,
+    randomize_seed: bool = False,
+):
+    """
+    Generate a 3D model from text using the ShapE pipeline.
+    Returns a tuple of (glb_file_path, used_seed).
+    """
+    seed = int(randomize_seed_fn(seed, randomize_seed))
+    model3d = Model()
+    glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
+    return glb_path, seed
 # -----------------------------------------------------------------------------
+# Chat Generation Function with support for @tts, @image, and @3d commands
 # -----------------------------------------------------------------------------
 @spaces.GPU
 ):
     """
     Generates chatbot responses with support for multimodal input, TTS, image generation,
+    and 3D model generation.
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
       - "@3d": triggers 3D model generation using the ShapE pipeline.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
         yield gr.Image(image_paths[0])
         return
     # --- Text and TTS branch ---
     tts_prefix = "@tts"
     is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
     examples=[
         ["@tts1 Who is Nikola Tesla, and why did he die?"],
         ["@3d A birthday cupcake with cherry"],
         [{"text": "summarize the letter", "files": ["examples/1.png"]}],
         ["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
+        ["Write a Python function to check if a number is prime."],
         ["@tts2 What causes rainbows to form?"],
     ],
     cache_examples=False,
 demo.app.mount("/static", StaticFiles(directory="static"), name="static")
 if __name__ == "__main__":
+    # Launch without the unsupported static_dirs parameter.
     demo.queue(max_size=20).launch(share=True)