CyranoB commited on
Commit
7e9684b
1 Parent(s): 9c3709d

Added output format option

Browse files
Files changed (2) hide show
  1. messages.py +91 -0
  2. search_agent.py +17 -93
messages.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from langchain.schema import SystemMessage, HumanMessage
3
+
4
+ def get_optimized_search_messages(query):
5
+ messages = [
6
+ SystemMessage(
7
+ content="""
8
+ You are a serach query optimizer specialist.
9
+ Rewrite the user's question using only the most important keywords. Remove extra words.
10
+ Tips:
11
+ Identify the key concepts in the question
12
+ Remove filler words like "how to", "what is", "I want to"
13
+ Removed style such as "in the style of", "engaging", "short", "long"
14
+ Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
15
+ Keep it short, around 3-7 words total
16
+ Put the most important keywords first
17
+ Remove formatting instructions
18
+ Remove style instructions (exmaple: in the style of, engaging, short, long)
19
+ Remove lenght instruction (example: essay, article, letter, etc)
20
+ Example:
21
+ Question: How do I bake chocolate chip cookies from scratch?
22
+ Search query: chocolate chip cookies recipe from scratch
23
+ Example:
24
+ Question: I would like you to show me a time line of Marie Curie life. Show results as a markdown table
25
+ Search query: Marie Curie timeline
26
+ Example:
27
+ Question: I would like you to write a long article on nato vs russia. Use know geopolical frameworks.
28
+ Search query: geopolitics nato russia
29
+ Example:
30
+ Question: Write a engaging linkedin post about Andrew Ng
31
+ Search query: Andrew Ng
32
+ Example:
33
+ Question: Write a short artible about the solar system in the style of Carl Sagan
34
+ Search query: solar system
35
+ Example:
36
+ Question: Should I use Kubernetes? Answer in the style of Gilfoyde from the TV show Silicon Valley
37
+ Search query: Kubernetes decision
38
+ Example:
39
+ Question: biography of napoleon. include a table with the major events.
40
+ Search query: napoleon biography events
41
+ """
42
+ ),
43
+ HumanMessage(
44
+ content=f"""
45
+ Questions: {query}
46
+ Search query:
47
+ """
48
+ ),
49
+ ]
50
+ return messages
51
+
52
+ def get_query_with_sources_messages(query, relevant_docs):
53
+ messages = [
54
+ SystemMessage(
55
+ content="""
56
+ You are an expert research assistant.
57
+ You are provided with a Context in JSON format and a Question.
58
+
59
+ Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
60
+ When generating your answer, follow these steps:
61
+ - Retrieve the most relevant context material from your knowledge base to help answer the question
62
+ - Cite the references you use by including the title, author, publication, and a link to each source
63
+ - Synthesize the retrieved information into a clear, informative answer to the question
64
+ - Format your answer in Markdown, using heading levels 2-3 as needed
65
+ - Include a "References" section at the end with the full citations and link for each source you used
66
+
67
+
68
+ Example of Context JSON entry:
69
+ {
70
+ "page_content": "This provides access to material related to ...",
71
+ "metadata": {
72
+ "title": "Introduction - Marie Curie: Topics in Chronicling America",
73
+ "link": "https://guides.loc.gov/chronicling-america-marie-curie"
74
+ }
75
+ }
76
+
77
+ """
78
+ ),
79
+ HumanMessage(
80
+ content= f"""
81
+ Context information is below.
82
+ Context:
83
+ ---------------------
84
+ {json.dumps(relevant_docs, indent=2, ensure_ascii=False)}
85
+ ---------------------
86
+ Question: {query}
87
+ Answer:
88
+ """
89
+ ),
90
+ ]
91
+ return messages
search_agent.py CHANGED
@@ -6,6 +6,7 @@ Usage:
6
  [--provider=provider]
7
  [--temperature=temp]
8
  [--max_pages=num]
 
9
  SEARCH_QUERY
10
  search_agent.py --version
11
 
@@ -16,6 +17,7 @@ Options:
16
  -t temp --temperature=temp Set the temperature of the LLM [default: 0.0]
17
  -p provider --provider=provider Use a specific LLM (choices: bedrock,openai,groq) [default: openai]
18
  -m num --max_pages=num Max number of pages to retrieve [default: 10]
 
19
 
20
  """
21
 
@@ -63,52 +65,8 @@ def get_chat_llm(provider, temperature=0.0):
63
  return chat_llm
64
 
65
  def optimize_search_query(query):
66
- messages = [
67
- SystemMessage(
68
- content="""
69
- You are a serach query optimizer specialist.
70
- Rewrite the user's question using only the most important keywords. Remove extra words.
71
- Tips:
72
- Identify the key concepts in the question
73
- Remove filler words like "how to", "what is", "I want to"
74
- Removed style such as "in the style of", "engaging", "short", "long"
75
- Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
76
- Keep it short, around 3-7 words total
77
- Put the most important keywords first
78
- Remove formatting instructions
79
- Remove style instructions (exmaple: in the style of, engaging, short, long)
80
- Remove lenght instruction (example: essay, article, letter, etc)
81
- Example:
82
- Question: How do I bake chocolate chip cookies from scratch?
83
- Search query: chocolate chip cookies recipe from scratch
84
- Example:
85
- Question: I would like you to show me a time line of Marie Curie life. Show results as a markdown table
86
- Search query: Marie Curie timeline
87
- Example:
88
- Question: I would like you to write a long article on nato vs russia. Use know geopolical frameworks.
89
- Search query: geopolitics nato russia
90
- Example:
91
- Question: Write a engaging linkedin post about Andrew Ng
92
- Search query: Andrew Ng
93
- Example:
94
- Question: Write a short artible about the solar system in the style of Carl Sagan
95
- Search query: solar system
96
- Example:
97
- Question: Should I use Kubernetes? Answer in the style of Gilfoyde from the TV show Silicon Valley
98
- Search query: Kubernetes decision
99
- Example:
100
- Question: biography of napoleon. include a table with the major events.
101
- Search query: napoleon biography events
102
- """
103
- ),
104
- HumanMessage(
105
- content=f"""
106
- Questions: {query}
107
- Search query:
108
- """
109
- ),
110
- ]
111
-
112
  response = chat.invoke(messages, config={"callbacks": callbacks})
113
  return response.content
114
 
@@ -238,45 +196,8 @@ def process_and_vectorize_content(
238
 
239
 
240
  def answer_query_with_sources(query, relevant_docs):
241
- messages = [
242
- SystemMessage(
243
- content="""
244
- You are an expert research assistant.
245
- You are provided with a Context in JSON format and a Question.
246
-
247
- Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
248
- When generating your answer, follow these steps:
249
- - Retrieve the most relevant context material from your knowledge base to help answer the question
250
- - Cite the references you use by including the title, author, publication, and a link to each source
251
- - Synthesize the retrieved information into a clear, informative answer to the question
252
- - Format your answer in Markdown, using heading levels 2-3 as needed
253
- - Include a "References" section at the end with the full citations and link for each source you used
254
-
255
-
256
- Example of Context JSON entry:
257
- {
258
- "page_content": "This provides access to material related to ...",
259
- "metadata": {
260
- "title": "Introduction - Marie Curie: Topics in Chronicling America",
261
- "link": "https://guides.loc.gov/chronicling-america-marie-curie"
262
- }
263
- }
264
-
265
- """
266
- ),
267
- HumanMessage(
268
- content= f"""
269
- Context information is below.
270
- Context:
271
- ---------------------
272
- {json.dumps(relevant_docs, indent=2, ensure_ascii=False)}
273
- ---------------------
274
- Question: {query}
275
- Answer:
276
- """
277
- ),
278
- ]
279
-
280
  response = chat.invoke(messages, config={"callbacks": callbacks})
281
  return response
282
 
@@ -296,20 +217,20 @@ if(os.getenv("LANGCHAIN_API_KEY")):
296
 
297
  if __name__ == '__main__':
298
  arguments = docopt(__doc__, version='Search Agent 0.1')
299
- #print(arguments)
300
-
301
 
302
  provider = arguments["--provider"]
303
  temperature = float(arguments["--temperature"])
304
- chat = get_chat_llm(provider, temperature)
 
 
305
  query = arguments["SEARCH_QUERY"]
306
-
 
 
307
  with console.status(f"[bold green]Optimizing query for search: {query}"):
308
  optimize_search_query = optimize_search_query(query)
309
- console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
310
 
311
- domain=arguments["--domain"]
312
- max_pages=arguments["--max_pages"]
313
  with console.status(f"[bold green]Searching sources using the optimized query: {optimize_search_query}"):
314
  sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
315
  console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
@@ -329,5 +250,8 @@ if __name__ == '__main__':
329
  respomse = answer_query_with_sources(query, relevant_docs)
330
 
331
  console.rule(f"[bold green]Response from {provider}")
332
- console.print(Markdown(respomse.content))
 
 
 
333
  console.rule("[bold green]")
 
6
  [--provider=provider]
7
  [--temperature=temp]
8
  [--max_pages=num]
9
+ [--output=text]
10
  SEARCH_QUERY
11
  search_agent.py --version
12
 
 
17
  -t temp --temperature=temp Set the temperature of the LLM [default: 0.0]
18
  -p provider --provider=provider Use a specific LLM (choices: bedrock,openai,groq) [default: openai]
19
  -m num --max_pages=num Max number of pages to retrieve [default: 10]
20
+ -o text --output=text Output format (choices: text, markdown) [default: markdown]
21
 
22
  """
23
 
 
65
  return chat_llm
66
 
67
  def optimize_search_query(query):
68
+ from messages import get_optimized_search_messages
69
+ messages = get_optimized_search_messages(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  response = chat.invoke(messages, config={"callbacks": callbacks})
71
  return response.content
72
 
 
196
 
197
 
198
  def answer_query_with_sources(query, relevant_docs):
199
+ from messages import get_query_with_sources_messages
200
+ messages = get_query_with_sources_messages(query, relevant_docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  response = chat.invoke(messages, config={"callbacks": callbacks})
202
  return response
203
 
 
217
 
218
  if __name__ == '__main__':
219
  arguments = docopt(__doc__, version='Search Agent 0.1')
 
 
220
 
221
  provider = arguments["--provider"]
222
  temperature = float(arguments["--temperature"])
223
+ domain=arguments["--domain"]
224
+ max_pages=arguments["--max_pages"]
225
+ output=arguments["--output"]
226
  query = arguments["SEARCH_QUERY"]
227
+
228
+ chat = get_chat_llm(provider, temperature)
229
+
230
  with console.status(f"[bold green]Optimizing query for search: {query}"):
231
  optimize_search_query = optimize_search_query(query)
232
+ console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
233
 
 
 
234
  with console.status(f"[bold green]Searching sources using the optimized query: {optimize_search_query}"):
235
  sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
236
  console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
 
250
  respomse = answer_query_with_sources(query, relevant_docs)
251
 
252
  console.rule(f"[bold green]Response from {provider}")
253
+ if output == "text":
254
+ console.print(respomse.content)
255
+ else:
256
+ console.print(Markdown(respomse.content))
257
  console.rule("[bold green]")