Spaces:

CyranoB
/

search_agent

Running

App Files Files Community

CyranoB commited on Apr 1

Commit

7e9684b

•

1 Parent(s): 9c3709d

Added output format option

Browse files

Files changed (2) hide show

messages.py +91 -0
search_agent.py +17 -93

messages.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import json
+from langchain.schema import SystemMessage, HumanMessage
+def get_optimized_search_messages(query):
+    messages = [
+        SystemMessage(
+            content="""
+                    You are a serach query optimizer specialist.
+                    Rewrite the user's question using only the most important keywords. Remove extra words.
+                    Tips:
+                        Identify the key concepts in the question
+                        Remove filler words like "how to", "what is", "I want to"
+                        Removed style such as "in the style of", "engaging", "short", "long"
+                        Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
+                        Keep it short, around 3-7 words total
+                        Put the most important keywords first
+                        Remove formatting instructions
+                        Remove style instructions (exmaple: in the style of, engaging, short, long)
+                        Remove lenght instruction (example: essay, article, letter, etc)
+                    Example:
+                        Question: How do I bake chocolate chip cookies from scratch?
+                        Search query: chocolate chip cookies recipe from scratch
+                    Example:
+                        Question: I would like you to show me a time line of Marie Curie life. Show results as a markdown table
+                        Search query: Marie Curie timeline
+                    Example:
+                        Question: I would like you to write a long article on nato vs russia. Use know geopolical frameworks.
+                        Search query: geopolitics nato russia
+                    Example:
+                        Question: Write a engaging linkedin post about Andrew Ng
+                        Search query: Andrew Ng
+                    Example:
+                        Question: Write a short artible about the solar system in the style of Carl Sagan
+                        Search query: solar system
+                    Example:
+                        Question: Should I use Kubernetes? Answer in the style of Gilfoyde from the TV show Silicon Valley
+                        Search query: Kubernetes decision
+                    Example:
+                        Question: biography of napoleon. include a table with the major events.
+                        Search query: napoleon biography events
+                """
+        ),
+        HumanMessage(
+            content=f"""
+                Questions: {query}
+                Search query:
+            """
+        ),
+    ]
+    return messages
+def get_query_with_sources_messages(query, relevant_docs):
+    messages = [
+        SystemMessage(
+            content="""
+    You are an expert research assistant.
+    You are provided with a Context in JSON format and a Question.
+    Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
+    When generating your answer, follow these steps:
+    - Retrieve the most relevant context material from your knowledge base to help answer the question
+    - Cite the references you use by including the title, author, publication, and a link to each source
+    - Synthesize the retrieved information into a clear, informative answer to the question
+    - Format your answer in Markdown, using heading levels 2-3 as needed
+    - Include a "References" section at the end with the full citations and link for each source you used
+    Example of Context JSON entry:
+    {
+        "page_content": "This provides access to material related to ...",
+        "metadata": {
+            "title": "Introduction - Marie Curie: Topics in Chronicling America",
+            "link": "https://guides.loc.gov/chronicling-america-marie-curie"
+        }
+    }
+    """
+        ),
+        HumanMessage(
+            content= f"""
+        Context information is below.
+        Context:
+        ---------------------
+        {json.dumps(relevant_docs, indent=2, ensure_ascii=False)}
+        ---------------------
+        Question: {query}
+        Answer:
+    """
+        ),
+    ]
+    return messages

search_agent.py CHANGED Viewed

@@ -6,6 +6,7 @@ Usage:
         [--provider=provider]
         [--temperature=temp]
         [--max_pages=num]
         SEARCH_QUERY
     search_agent.py --version
@@ -16,6 +17,7 @@ Options:
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq) [default: openai]
     -m num --max_pages=num              Max number of pages to retrieve [default: 10]
 """
@@ -63,52 +65,8 @@ def get_chat_llm(provider, temperature=0.0):
     return chat_llm
 def optimize_search_query(query):
-    messages = [
-        SystemMessage(
-            content="""
-                    You are a serach query optimizer specialist.
-                    Rewrite the user's question using only the most important keywords. Remove extra words.
-                    Tips:
-                        Identify the key concepts in the question
-                        Remove filler words like "how to", "what is", "I want to"
-                        Removed style such as "in the style of", "engaging", "short", "long"
-                        Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
-                        Keep it short, around 3-7 words total
-                        Put the most important keywords first
-                        Remove formatting instructions
-                        Remove style instructions (exmaple: in the style of, engaging, short, long)
-                        Remove lenght instruction (example: essay, article, letter, etc)
-                    Example:
-                        Question: How do I bake chocolate chip cookies from scratch?
-                        Search query: chocolate chip cookies recipe from scratch
-                    Example:
-                        Question: I would like you to show me a time line of Marie Curie life. Show results as a markdown table
-                        Search query: Marie Curie timeline
-                    Example:
-                        Question: I would like you to write a long article on nato vs russia. Use know geopolical frameworks.
-                        Search query: geopolitics nato russia
-                    Example:
-                        Question: Write a engaging linkedin post about Andrew Ng
-                        Search query: Andrew Ng
-                    Example:
-                        Question: Write a short artible about the solar system in the style of Carl Sagan
-                        Search query: solar system
-                    Example:
-                        Question: Should I use Kubernetes? Answer in the style of Gilfoyde from the TV show Silicon Valley
-                        Search query: Kubernetes decision
-                    Example:
-                        Question: biography of napoleon. include a table with the major events.
-                        Search query: napoleon biography events
-                """
-        ),
-        HumanMessage(
-            content=f"""
-                Questions: {query}
-                Search query:
-            """
-        ),
-    ]
     response = chat.invoke(messages, config={"callbacks": callbacks})
     return response.content
@@ -238,45 +196,8 @@ def process_and_vectorize_content(
 def answer_query_with_sources(query, relevant_docs):
-    messages = [
-        SystemMessage(
-            content="""
-    You are an expert research assistant.
-    You are provided with a Context in JSON format and a Question.
-    Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
-    When generating your answer, follow these steps:
-    - Retrieve the most relevant context material from your knowledge base to help answer the question
-    - Cite the references you use by including the title, author, publication, and a link to each source
-    - Synthesize the retrieved information into a clear, informative answer to the question
-    - Format your answer in Markdown, using heading levels 2-3 as needed
-    - Include a "References" section at the end with the full citations and link for each source you used
-    Example of Context JSON entry:
-    {
-        "page_content": "This provides access to material related to ...",
-        "metadata": {
-            "title": "Introduction - Marie Curie: Topics in Chronicling America",
-            "link": "https://guides.loc.gov/chronicling-america-marie-curie"
-        }
-    }
-    """
-        ),
-        HumanMessage(
-            content= f"""
-        Context information is below.
-        Context:
-        ---------------------
-        {json.dumps(relevant_docs, indent=2, ensure_ascii=False)}
-        ---------------------
-        Question: {query}
-        Answer:
-    """
-        ),
-    ]
     response = chat.invoke(messages, config={"callbacks": callbacks})
     return response
@@ -296,20 +217,20 @@ if(os.getenv("LANGCHAIN_API_KEY")):
 if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
-    #print(arguments)
     provider = arguments["--provider"]
     temperature = float(arguments["--temperature"])
-    chat = get_chat_llm(provider, temperature)
     query = arguments["SEARCH_QUERY"]
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = optimize_search_query(query)
-    console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
-    domain=arguments["--domain"]
-    max_pages=arguments["--max_pages"]
     with console.status(f"[bold green]Searching sources using the optimized query: {optimize_search_query}"):
         sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
     console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
@@ -329,5 +250,8 @@ if __name__ == '__main__':
         respomse = answer_query_with_sources(query, relevant_docs)
     console.rule(f"[bold green]Response from {provider}")
-    console.print(Markdown(respomse.content))
     console.rule("[bold green]")

         [--provider=provider]
         [--temperature=temp]
         [--max_pages=num]
+        [--output=text]
         SEARCH_QUERY
     search_agent.py --version
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
     -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq) [default: openai]
     -m num --max_pages=num              Max number of pages to retrieve [default: 10]
+    -o text --output=text               Output format (choices: text, markdown) [default: markdown]
 """
     return chat_llm
 def optimize_search_query(query):
+    from messages import get_optimized_search_messages
+    messages = get_optimized_search_messages(query)
     response = chat.invoke(messages, config={"callbacks": callbacks})
     return response.content
 def answer_query_with_sources(query, relevant_docs):
+    from messages import get_query_with_sources_messages
+    messages = get_query_with_sources_messages(query, relevant_docs)
     response = chat.invoke(messages, config={"callbacks": callbacks})
     return response
 if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
     provider = arguments["--provider"]
     temperature = float(arguments["--temperature"])
+    domain=arguments["--domain"]
+    max_pages=arguments["--max_pages"]
+    output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
+    chat = get_chat_llm(provider, temperature)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
         optimize_search_query = optimize_search_query(query)
+    console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
     with console.status(f"[bold green]Searching sources using the optimized query: {optimize_search_query}"):
         sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
     console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
         respomse = answer_query_with_sources(query, relevant_docs)
     console.rule(f"[bold green]Response from {provider}")
+    if output == "text":
+        console.print(respomse.content)
+    else:
+        console.print(Markdown(respomse.content))
     console.rule("[bold green]")