Spaces:

VanguardAI
/

MultiModal_OpenSource_AI

Running on Zero

App Files Files Community

VanguardAI commited on Aug 14, 2024

Commit

5db9d8c

verified ·

1 Parent(s): 9748b93

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -76

app.py CHANGED Viewed

@@ -8,13 +8,11 @@ from transformers import AutoModel, AutoTokenizer
 from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
 from parler_tts import ParlerTTSForConditionalGeneration
 import soundfile as sf
-from langchain_community.embeddings import OpenAIEmbeddings
-from langchain_community.vectorstores import Chroma
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains import RetrievalQA
-from langchain import LLMChain, PromptTemplate
-from langchain.agents import AgentExecutor, Tool, ZeroShotAgent
-from langchain.llms import OpenAI
 from PIL import Image
 from decord import VideoReader, cpu
 from tavily import TavilyClient
@@ -89,32 +87,29 @@ def image_generation(query):
 # Document Question Answering Tool
 def doc_question_answering(query, file_path):
-    with open(file_path, 'r') as f:
-        file_content = f.read()
-    # Split the document into smaller chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    docs = text_splitter.create_documents([file_content])
-    # Create embeddings using the groq model
-    embeddings = OpenAIEmbeddings()  # If you're using a custom embeddings model, replace this line with the corresponding embeddings model for groq
-    # Set up the Chroma database for document retrieval
-    db = Chroma.from_documents(docs, embeddings, persist_directory=".chroma_db")
-    # Create a custom function to use groq for the question-answering step
-    def groq_llm(query):
-        response = client.chat.completions.create(
-            model=MODEL,
-            messages=[{"role": "user", "content": query}]
-        )
-        return response.choices[0].message.content
-    # Set up the RetrievalQA chain using the custom groq LLM function
-    qa = RetrievalQA.from_chain_type(llm=groq_llm, chain_type="stuff", retriever=db.as_retriever())
-    # Run the QA process with the groq model
-    return qa.run(query)
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
@@ -128,53 +123,21 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
         user_prompt = transcription.text
     tools = [
-        Tool(
-            name="Numpy Code Calculator",
-            func=numpy_code_calculator,
-            description="Useful for when you need to perform mathematical calculations using NumPy. Provide the calculation you want to perform.",
-        ),
-        Tool(
-            name="Web Search",
-            func=web_search,
-            description="Useful for when you need to find information from the real world.",
-        ),
-        Tool(
-            name="Image Generation",
-            func=image_generation,
-            description="Useful for when you need to generate an image based on a description.",
-        ),
     ]
     if doc:
         tools.append(
-            Tool(
-                name="Document Question Answering",
-                func=lambda query: doc_question_answering(query, doc.name),
-                description="Useful for when you need to answer questions about the uploaded document.",
             )
         )
-    # Add this new code block:
-    prefix = """You are an AI assistant. You have access to the following tools:"""
-    suffix = """Begin!"
-    {chat_history}
-    Human: {input}
-    AI: I will do my best to assist you. Let me think about this step-by-step:"""
-    prompt = ZeroShotAgent.create_prompt(
-        tools,
-        prefix=prefix,
-        suffix=suffix,
-        input_variables=["input", "chat_history"]
-    )
-    llm = Groq(model=MODEL)
-    llm_chain = LLMChain(llm=llm, prompt=prompt)
-    agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
-    agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
     if image:
         image = Image.open(image).convert('RGB')
@@ -183,9 +146,9 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, webs
         return response
     if websearch:
-        response = agent_executor.run(f"{user_prompt} Use the Web Search tool if necessary.")
     else:
-        response = agent_executor.run(user_prompt)
     return response
@@ -245,4 +208,4 @@ def main_interface(user_prompt, image=None, audio=None, doc=None, voice_only=Fal
 # Launch the UI
 demo = create_ui()
-demo.launch()

 from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
 from parler_tts import ParlerTTSForConditionalGeneration
 import soundfile as sf
+from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper
+from llama_index.embeddings import GroqEmbedding
+from llama_index.llms import GroqLLM
+from llama_index.agent import ReActAgent
+from llama_index.tools import FunctionTool
 from PIL import Image
 from decord import VideoReader, cpu
 from tavily import TavilyClient
 # Document Question Answering Tool
 def doc_question_answering(query, file_path):
+    # Load documents
+    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
+    # Initialize Groq embedding model
+    embed_model = GroqEmbedding()
+    # Initialize Groq LLM
+    llm_predictor = LLMPredictor(llm=GroqLLM(model_name=MODEL))
+    # Initialize prompt helper
+    prompt_helper = PromptHelper()
+    # Create index
+    index = GPTSimpleVectorIndex.from_documents(
+        documents,
+        embed_model=embed_model,
+        llm_predictor=llm_predictor,
+        prompt_helper=prompt_helper
+    )
+    # Query the index
+    response = index.query(query)
+    return response.response
 # Function to handle different input types and choose the right tool
 def handle_input(user_prompt, image=None, video=None, audio=None, doc=None, websearch=False):
         user_prompt = transcription.text
     tools = [
+        FunctionTool.from_defaults(fn=numpy_code_calculator, name="Numpy Code Calculator"),
+        FunctionTool.from_defaults(fn=web_search, name="Web Search"),
+        FunctionTool.from_defaults(fn=image_generation, name="Image Generation"),
     ]
     if doc:
         tools.append(
+            FunctionTool.from_defaults(
+                fn=lambda query: doc_question_answering(query, doc.name),
+                name="Document Question Answering"
             )
         )
+    llm = GroqLLM(model_name=MODEL)
+    agent = ReActAgent.from_tools(tools, llm=llm, verbose=True)
     if image:
         image = Image.open(image).convert('RGB')
         return response
     if websearch:
+        response = agent.chat(f"{user_prompt} Use the Web Search tool if necessary.")
     else:
+        response = agent.chat(user_prompt)
     return response
 # Launch the UI
 demo = create_ui()
+demo.launch()