Dharma20 commited on
Commit
ad342cc
·
verified ·
1 Parent(s): 1d76bdf

Update usecase_agent.py

Browse files
Files changed (1) hide show
  1. usecase_agent.py +597 -597
usecase_agent.py CHANGED
@@ -1,597 +1,597 @@
1
- from setup import *
2
- from typing import List, Optional
3
- from typing_extensions import TypedDict
4
- from pydantic import BaseModel, Field
5
- from langgraph.graph import START, END, StateGraph
6
- from langgraph.checkpoint.memory import MemorySaver
7
- from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, get_buffer_string
8
- from langgraph.constants import Send
9
- from operator import add
10
- from langgraph.graph import MessagesState
11
- from typing import Annotated
12
- from langchain_community.vectorstores import Chroma
13
- from langchain_community.embeddings.jina import JinaEmbeddings
14
- # from langchain_huggingface import HuggingFaceEmbeddings
15
-
16
- class Analyst(BaseModel):
17
- affiliation: str = Field(
18
- description="Primary affiliation of the analyst.",
19
- )
20
- name: str = Field(
21
- description="Name of the analyst."
22
- )
23
- role: str = Field(
24
- description="Role of the analyst in the context of the topic.",
25
- )
26
- description: str = Field(
27
- description="Description of the analyst focus, concerns, and motives.",
28
- )
29
- @property
30
- def persona(self) -> str:
31
- return f"Name: {self.name}\nRole: {self.role}\nAffiliation: {self.affiliation}\nDescription: {self.description}\n"
32
-
33
-
34
- class Perspectives(BaseModel):
35
- analysts: List[Analyst] = Field(
36
- description="Comprehensive list of analysts with their roles and affiliations.",
37
- )
38
-
39
-
40
- class GenerateAnalystsState(TypedDict):
41
- topic: str # Research topic
42
- max_analysts: int # Number of analysts to generate
43
- analysts: List[Analyst] # Analyst asking questions
44
-
45
-
46
- class InterviewState(MessagesState):
47
- max_num_turns: int # Number turns of conversation
48
- context: Annotated[list, add] # Source docs
49
- analyst: Analyst # Analyst asking questions
50
- interview: str # Interview transcript
51
- sections: list # Final key we duplicate in outer state for Send() API
52
-
53
-
54
- class SearchQuery(BaseModel):
55
- search_query: str = Field(None, description="Search query for retrieval.")
56
-
57
-
58
-
59
- def create_analysts(state: GenerateAnalystsState):
60
-
61
- """ Create analysts """
62
-
63
- topic=state['topic']
64
- max_analysts=state['max_analysts']
65
-
66
- structured_llm = llm.with_structured_output(Perspectives)
67
-
68
- analyst_instructions = """You are tasked with creating a set of AI analyst personas. Follow these instructions carefully:
69
- 1. First, review the research topic:{topic}
70
- 2. Create {max_analysts} analysts with following roles:
71
- - Industry expert
72
- - GenAI expert
73
- - Business strategist
74
- 3. Determine the most interesting themes based upon documents and/or feedback above.
75
- 4. Pick the top {max_analysts} themes.
76
- 5. For each theme, create one analyst with ALL of the following required fields: - name: A fitting name for the analyst - role: Their specific role or title - affiliation: Their primary organization or institution - description: A detailed description of their focus areas, concerns, and motives
77
- 6. Ensure every analyst includes all four fields without exception.
78
- Remember: Every analyst **MUST** have all four fields (name, role, affiliation, and description) properly defined. Incomplete personas are not acceptable."""
79
-
80
- # System message
81
- system_message = analyst_instructions.format(topic=topic, max_analysts=max_analysts)
82
-
83
- analysts = structured_llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content="Generate the set of analysts.")])
84
-
85
- # Write the list of analysis to state
86
- return {"analysts": analysts.analysts}
87
-
88
-
89
-
90
-
91
- def vectorstore_writing(doc_splits):
92
- global retriever
93
- vectorstore = Chroma.from_documents(
94
- documents=doc_splits,
95
- collection_name="rag-chroma",
96
- embedding = JinaEmbeddings(model_name='jina-embeddings-v3'),
97
- persist_directory='./chroma_db'
98
- )
99
- retriever = vectorstore.as_retriever()
100
-
101
-
102
-
103
-
104
-
105
- def generate_question(state:InterviewState):
106
- """ Generate questions for the interview """
107
-
108
- # print('----STATE----', state)
109
- # Get the analyst persona
110
- analyst = state['analyst']
111
- messages = state['messages']
112
- context = state["context"]
113
-
114
- question_instructions = """You are an analyst tasked with interviewing an expert to learn about the use of Generative AI (GenAI) applications in a specific industry or company, if mentioned.
115
-
116
- Your goal is to uncover interesting and specific insights related to the topic of Generative AI use cases.
117
-
118
- Interesting: Insights that are surprising, non-obvious, or reveal unique applications of GenAI in the industry or company.
119
- Specific: Insights that avoid generalities and include specific examples or case studies relevant to the company’s offerings, strategic focus areas, or the industry’s needs.
120
- Focus Areas:
121
- Explore the company's key offerings and strategic focus areas (e.g., operations, supply chain, customer experience, etc.), if the company is named.
122
- Discuss industry-wide trends, innovations, and opportunities enabled by GenAI, such as improved operational efficiency, enhanced customer experiences, or streamlined supply chain processes.
123
- Gather details on the company or industry's vision and products, focusing on how GenAI can be applied to enhance or transform their workflows.
124
- Task:
125
- Begin by introducing yourself with a name that fits your persona, then ask your question.
126
-
127
- Continue asking follow-up questions to drill down into:
128
-
129
- Specific GenAI use cases within the company's domain or the industry.
130
- How these applications align with the company's or industry's strategic goals.
131
- Real-world examples or future opportunities for integrating GenAI into their processes.
132
- Complete the interview by saying:
133
- "Thank you so much for your help!"
134
-
135
- Remember to stay in character throughout the conversation, reflecting your persona and the provided goals."""
136
-
137
- # Generate the question
138
- question = llm.invoke([SystemMessage(content=question_instructions)]+[HumanMessage(content="Generate the question.")])
139
-
140
- return {"messages": [question]}
141
-
142
-
143
-
144
- def search_vectorstore(state: InterviewState):
145
-
146
- """ Retrieve docs from Docstore """
147
-
148
- # Search query writing
149
- search_instructions = SystemMessage(content=f"""You will be given a conversation between an analyst and an expert.
150
-
151
- Your goal is to generate a well-structured query for use in retrieval and / or web-search related to the conversation.
152
-
153
- First, analyze the full conversation.
154
-
155
- Pay particular attention to the final question posed by the analyst.
156
-
157
- Convert this final question into a well-structured web search query""")
158
-
159
- # Search query
160
- structured_llm = llm.with_structured_output(SearchQuery)
161
- search_query = structured_llm.invoke([search_instructions]+state['messages'])
162
-
163
- # Search
164
- search_docs = retriever.invoke(input=search_query.search_query)
165
-
166
- # Format
167
- formatted_search_docs = "\n\n---\n\n".join(
168
- [
169
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
170
- for doc in search_docs
171
- ]
172
- )
173
-
174
- return {"context": [formatted_search_docs]}
175
-
176
-
177
-
178
- def generate_answer(state: InterviewState):
179
-
180
- """ Node to answer a question """
181
-
182
- # Get state
183
- analyst = state["analyst"]
184
- messages = state["messages"]
185
- context = state["context"]
186
-
187
-
188
- answer_instructions = """You are an expert being interviewed by an analyst.
189
-
190
- Here is analyst area of focus: {goals}.
191
-
192
- You goal is to answer a question posed by the interviewer.
193
-
194
- To answer question, use this context:
195
-
196
- {context}
197
-
198
- When answering questions, follow these guidelines:
199
-
200
- 1. Use only the information provided in the context.
201
-
202
- 2. Do not introduce external information or make assumptions beyond what is explicitly stated in the context.
203
-
204
- 3. The context contain sources at the topic of each individual document.
205
-
206
- 4. Include these sources your answer next to any relevant statements. For example, for source # 1 use [1].
207
-
208
- 5. List your sources in order at the bottom of your answer. [1] Source 1, [2] Source 2, etc
209
-
210
- 6. If the source is: <Document source="assistant/docs/llama3_1.pdf" page="7"/>' then just list:
211
-
212
- [1] assistant/docs/llama3_1.pdf, page 7
213
-
214
- And skip the addition of the brackets as well as the Document source preamble in your citation."""
215
-
216
-
217
-
218
- # Answer question
219
- system_message = answer_instructions.format(goals=analyst.persona, context=context)
220
- answer = llm.invoke([SystemMessage(content=system_message)]+messages)
221
-
222
- # Name the message as coming from the expert
223
- answer.name = "expert"
224
-
225
- # Append it to state
226
- return {"messages": [answer]}
227
-
228
-
229
- def save_interview(state: InterviewState):
230
-
231
- """ Save interviews """
232
-
233
- # Get messages
234
- messages = state["messages"]
235
-
236
- # Convert interview to a string
237
- interview = get_buffer_string(messages)
238
-
239
- # Save to interviews key
240
- return {"interview": interview}
241
-
242
-
243
-
244
- def route_messages(state: InterviewState,
245
- name: str = "expert"):
246
-
247
- """ Route between question and answer """
248
-
249
- # Get messages
250
- messages = state["messages"]
251
- max_num_turns = state.get('max_num_turns',2)
252
-
253
- # Check the number of expert answers
254
- num_responses = len(
255
- [m for m in messages if isinstance(m, AIMessage) and m.name == name]
256
- )
257
-
258
- # End if expert has answered more than the max turns
259
- if num_responses >= max_num_turns:
260
- return 'save_interview'
261
-
262
- # This router is run after each question - answer pair
263
- # Get the last question asked to check if it signals the end of discussion
264
- last_question = messages[-2]
265
-
266
- if "Thank you so much for your help" in last_question.content:
267
- return 'save_interview'
268
- return "ask_question"
269
-
270
-
271
-
272
- def write_section(state: InterviewState):
273
-
274
- """ Node to answer a question """
275
-
276
- # Get state
277
- interview = state["interview"]
278
- context = state["context"]
279
- analyst = state["analyst"]
280
-
281
-
282
- section_writer_instructions = """You are an expert technical writer.
283
-
284
- Your task is to create a short, easily digestible section of a report based on a set of source documents.
285
-
286
- 1. Analyze the content of the source documents:
287
- - The name of each source document is at the start of the document, with the <Document tag.
288
-
289
- 2. Create a report structure using markdown formatting:
290
- - Use ## for the section title
291
- - Use ### for sub-section headers
292
-
293
- 3. Write the report following this structure:
294
- a. Title (## header)
295
- b. Summary (### header)
296
- c. Sources (### header)
297
-
298
- 4. Make your title engaging based upon the focus area of the analyst:
299
- {focus}
300
-
301
- 5. For the summary section:
302
- - Set up summary with general background / context related to the focus area of the analyst
303
- - Emphasize what is novel, interesting, or surprising about insights gathered from the interview
304
- - Create a numbered list of source documents, as you use them
305
- - Do not mention the names of interviewers or experts
306
- - Aim for approximately 400 words maximum
307
- - Use numbered sources in your report (e.g., [1], [2]) based on information from source documents
308
-
309
- 6. In the Sources section:
310
- - Include all sources used in your report
311
- - Provide full links to relevant websites or specific document paths
312
- - Separate each source by a newline. Use two spaces at the end of each line to create a newline in Markdown.
313
- - It will look like:
314
-
315
- ### Sources
316
- [1] Link or Document name
317
- [2] Link or Document name
318
-
319
- 7. Be sure to combine sources. For example this is not correct:
320
-
321
- [3] https://ai.meta.com/blog/meta-llama-3-1/
322
- [4] https://ai.meta.com/blog/meta-llama-3-1/
323
-
324
- There should be no redundant sources. It should simply be:
325
-
326
- [3] https://ai.meta.com/blog/meta-llama-3-1/
327
-
328
- 8. Final review:
329
- - Ensure the report follows the required structure
330
- - Include no preamble before the title of the report
331
- - Check that all guidelines have been followed"""
332
-
333
-
334
- # Write section using either the gathered source docs from interview (context) or the interview itself (interview)
335
- system_message = section_writer_instructions.format(focus=analyst.description)
336
- section = llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content=f"Use this source to write your section: {context}")])
337
-
338
- # Append it to state
339
- return {"sections": [section.content]}
340
-
341
-
342
-
343
- # Add nodes and edges
344
- interview_builder = StateGraph(InterviewState)
345
- interview_builder.add_node("ask_question", generate_question)
346
- interview_builder.add_node("search_rag", search_vectorstore)
347
- interview_builder.add_node("answer_question", generate_answer)
348
- interview_builder.add_node("save_interview", save_interview)
349
- interview_builder.add_node("write_section", write_section)
350
-
351
- # Flow
352
- interview_builder.add_edge(START, "ask_question")
353
- interview_builder.add_edge("ask_question", "search_rag")
354
- interview_builder.add_edge("search_rag", "answer_question")
355
- interview_builder.add_conditional_edges("answer_question", route_messages,['ask_question','save_interview'])
356
- interview_builder.add_edge("save_interview", "write_section")
357
- interview_builder.add_edge("write_section", END)
358
-
359
- # Interview
360
- memory = MemorySaver()
361
- interview_graph = interview_builder.compile(checkpointer=memory).with_config(run_name="Conduct Interviews")
362
-
363
-
364
-
365
-
366
- class ResearchGraphState(TypedDict):
367
- topic: str # Research topic
368
- max_analysts: int # Number of analysts
369
- analysts: List[Analyst] # Analyst asking questions
370
- sections: Annotated[list, add] # Send() API key
371
- introduction: str # Introduction for the final report
372
- content: str # Content for the final report
373
- conclusion: str # Conclusion for the final report
374
- final_report: str # Final report
375
- human_analyst_feedback: Optional[str] # Human feedback
376
-
377
-
378
-
379
- def initiate_all_interviews(state: ResearchGraphState):
380
- """ This is the "map" step where we run each interview sub-graph using Send API """
381
-
382
- # Check if human feedback
383
- human_analyst_feedback=state.get('human_analyst_feedback')
384
- if human_analyst_feedback:
385
- # Return to create_analysts
386
- return "create_analysts"
387
-
388
- # Otherwise kick off interviews in parallel via Send() API
389
- else:
390
- topic = state["topic"]
391
- return [Send("conduct_interview", {"analyst": analyst,
392
- "messages": [HumanMessage(
393
- content=f"So you said you were writing an article on {topic}?")],
394
- }) for analyst in state["analysts"]]
395
-
396
- report_writer_instructions = '''You are a technical writer tasked with creating a report on the overall topic:
397
-
398
- **{topic}**
399
-
400
- Your team of analysts has conducted interviews and written memos based on their findings. Your task is to consolidate the insights from these memos into a cohesive and structured report, following this format:
401
-
402
- Think deeply and Generate atleat 2 use cases based on the memos.
403
-
404
- ### Format for Each Use Case
405
- 1. **Title Header:** Use a descriptive title for each use case, such as "## Use Case 1: AI-Powered Predictive Maintenance."
406
- 2. **Objective/Use Case:** Summarize the primary goal or application of AI for this use case in one or two sentences.
407
- 3. **AI Application:** Describe the specific AI technologies or methods used to achieve the objective.
408
- 4. **Cross-Functional Benefit:** Outline the key benefits across various functions, formatted as bullet points, specifying which department or area benefits from the AI use case.
409
-
410
- ### Example Format:
411
-
412
- ## Use Case 1: AI-Powered Predictive Maintenance
413
- **Objective/Use Case:** Reduce equipment downtime and maintenance costs by predicting equipment failures before they occur.
414
- **AI Application:** Implement machine learning algorithms that analyze real-time sensor data from machinery to predict potential failures and schedule maintenance proactively.
415
- **Cross-Functional Benefit:**
416
- - **Operations & Maintenance:** Minimizes unplanned downtime and extends equipment lifespan.
417
- - **Finance:** Reduces maintenance costs and improves budgeting accuracy.
418
- - **Supply Chain:** Optimizes spare parts inventory based on predictive insights.
419
-
420
- ## Use Case 2: Real-Time Quality Control with Computer Vision
421
- **Objective/Use Case:** Enhance product quality by detecting defects in products during manufacturing.
422
- **AI Application:** Deploy AI-powered computer vision systems on production lines to identify surface defects and inconsistencies in real time.
423
- **Cross-Functional Benefit:**
424
- - **Quality Assurance:** Improves defect detection accuracy and reduces scrap rates.
425
- - **Production:** Enables immediate corrective actions, enhancing overall efficiency.
426
- - **Customer Satisfaction:** Delivers higher-quality products, strengthening client relationships.
427
-
428
- ### Report Guidelines
429
- 1. Begin with the first use case title in the specified format.
430
- 2. Do not include any preamble or introductory text for the report.
431
- 3. Consolidate insights into distinct use cases, with a focus on clarity and relevance.
432
- 4. Preserve any citations included in the memos, formatted in brackets, e.g., [1], [2].
433
- 5. After detailing all use cases, include a **Sources** section with the title: `## Sources`.
434
- 6. Be sure to combine sources. For example this is not correct:
435
-
436
- [3] https://ai.meta.com/blog/meta-llama-3-1/
437
- [4] https://ai.meta.com/blog/meta-llama-3-1/
438
-
439
- There should be no redundant sources. It should simply be:
440
- [3] https://ai.meta.com/blog/meta-llama-3-1/
441
-
442
- ### Your Inputs
443
- You will be given a collection of memos from your analysts under `{context}`. Extract and distill insights into specific use cases, ensuring each use case adheres to the prescribed format.'''
444
-
445
- def write_report(state: ResearchGraphState):
446
- # Full set of sections
447
- sections = state["sections"]
448
- topic = state["topic"]
449
-
450
- # Concat all sections together
451
- formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
452
-
453
- # Summarize the sections into a final report
454
- system_message = report_writer_instructions.format(topic=topic, context=formatted_str_sections)
455
- report = llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content=f"Write a report based upon these memos.")])
456
- return {"content": report.content}
457
-
458
-
459
- def human_feedback(state: ResearchGraphState):
460
- """ No-op node that should be interrupted on """
461
- pass
462
-
463
-
464
-
465
- def write_introduction(state: ResearchGraphState):
466
- # Full set of sections
467
- sections = state["sections"]
468
- topic = state["topic"]
469
-
470
- # Concat all sections together
471
- formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
472
-
473
- intro_conclusion_instructions = """You are a technical writer finishing a report on {topic}
474
-
475
- You will be given all of the sections of the report.
476
-
477
- You job is to write a crisp and compelling introduction or conclusion section.
478
-
479
- The user will instruct you whether to write the introduction or conclusion.
480
-
481
- Include no pre-amble for either section.
482
-
483
- Target around 100 words, crisply previewing (for introduction) or recapping (for conclusion) all of the sections of the report.
484
-
485
- Use markdown formatting.
486
-
487
- For your introduction, create a compelling title and use the # header for the title.
488
-
489
- For your introduction, use ## Introduction as the section header.
490
-
491
- For your conclusion, use ## Conclusion as the section header.
492
-
493
- Here are the sections to reflect on for writing: {formatted_str_sections}"""
494
-
495
-
496
- # Summarize the sections into a final report
497
- instructions = intro_conclusion_instructions.format(topic=topic, formatted_str_sections=formatted_str_sections)
498
- intro = llm.invoke([instructions]+[HumanMessage(content=f"Write the report introduction")])
499
- return {"introduction": intro.content}
500
-
501
-
502
- def write_conclusion(state: ResearchGraphState):
503
- # Full set of sections
504
- sections = state["sections"]
505
- topic = state["topic"]
506
-
507
- # Concat all sections together
508
- formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
509
-
510
- intro_conclusion_instructions = """You are a technical writer finishing a report on {topic}
511
-
512
- You will be given all of the sections of the report.
513
-
514
- You job is to write a crisp and compelling introduction or conclusion section.
515
-
516
- The user will instruct you whether to write the introduction or conclusion.
517
-
518
- Include no pre-amble for either section.
519
-
520
- Target around 100 words, crisply previewing (for introduction) or recapping (for conclusion) all of the sections of the report.
521
-
522
- Use markdown formatting.
523
-
524
- For your introduction, create a compelling title and use the # header for the title.
525
-
526
- For your introduction, use ## Introduction as the section header.
527
-
528
- For your conclusion, use ## Conclusion as the section header.
529
-
530
- Here are the sections to reflect on for writing: {formatted_str_sections}"""
531
-
532
-
533
- # Summarize the sections into a final report
534
- instructions = intro_conclusion_instructions.format(topic=topic, formatted_str_sections=formatted_str_sections)
535
- conclusion = llm.invoke([instructions]+[HumanMessage(content=f"Write the report conclusion")])
536
- return {"conclusion": conclusion.content}
537
-
538
-
539
- def finalize_report(state: ResearchGraphState):
540
- """ The is the "reduce" step where we gather all the sections, combine them, and reflect on them to write the intro/conclusion """
541
- # Save full final report
542
- content = state["content"]
543
- if content.startswith("## Insights"):
544
- content = content.strip("## Insights")
545
- if "## Sources" in content:
546
- try:
547
- content, sources = content.split("\n## Sources\n")
548
- except:
549
- sources = None
550
- else:
551
- sources = None
552
-
553
- final_report = state["introduction"] + "\n\n---\n\n" + content + "\n\n---\n\n" + state["conclusion"]
554
- if sources is not None:
555
- final_report += "\n\n## Sources\n" + sources
556
- return {"final_report": final_report}
557
-
558
-
559
-
560
- def usecase_agent_func(topic,max_analysts):
561
- # Add nodes and edges
562
- builder = StateGraph(ResearchGraphState)
563
- builder.add_node("create_analysts", create_analysts)
564
- builder.add_node("human_feedback", human_feedback)
565
- builder.add_node("conduct_interview", interview_builder.compile())
566
- builder.add_node("write_report",write_report)
567
- builder.add_node("write_introduction",write_introduction)
568
- builder.add_node("write_conclusion",write_conclusion)
569
- builder.add_node("finalize_report",finalize_report)
570
-
571
- # Logic
572
- builder.add_edge(START, "create_analysts")
573
- builder.add_edge("create_analysts", "human_feedback")
574
- builder.add_conditional_edges("human_feedback", initiate_all_interviews, ["create_analysts", "conduct_interview"])
575
- builder.add_edge("conduct_interview", "write_report")
576
- builder.add_edge("conduct_interview", "write_introduction")
577
- builder.add_edge("conduct_interview", "write_conclusion")
578
- builder.add_edge(["write_conclusion", "write_report", "write_introduction"], "finalize_report")
579
- builder.add_edge("finalize_report", END)
580
-
581
- # Compile
582
- memory = MemorySaver()
583
- graph = builder.compile(checkpointer=memory)
584
- config = {"configurable": {"thread_id": "1"}}
585
- graph.invoke({"topic":topic,
586
- "max_analysts":max_analysts,
587
- 'human_analyst_feedback': None},
588
- config)
589
- final_state = graph.get_state(config)
590
- report = final_state.values.get('final_report')
591
-
592
- print('-----REPORT-----', report)
593
-
594
- return report
595
-
596
-
597
-
 
1
+ from setup import *
2
+ from typing import List, Optional
3
+ from typing_extensions import TypedDict
4
+ from pydantic import BaseModel, Field
5
+ from langgraph.graph import START, END, StateGraph
6
+ from langgraph.checkpoint.memory import MemorySaver
7
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, get_buffer_string
8
+ from langgraph.constants import Send
9
+ from operator import add
10
+ from langgraph.graph import MessagesState
11
+ from typing import Annotated
12
+ from langchain_community.vectorstores import Chroma
13
+ from langchain_community.embeddings.jina import JinaEmbeddings
14
+ # from langchain_huggingface import HuggingFaceEmbeddings
15
+
16
+ class Analyst(BaseModel):
17
+ affiliation: str = Field(
18
+ description="Primary affiliation of the analyst.",
19
+ )
20
+ name: str = Field(
21
+ description="Name of the analyst."
22
+ )
23
+ role: str = Field(
24
+ description="Role of the analyst in the context of the topic.",
25
+ )
26
+ description: str = Field(
27
+ description="Description of the analyst focus, concerns, and motives.",
28
+ )
29
+ @property
30
+ def persona(self) -> str:
31
+ return f"Name: {self.name}\nRole: {self.role}\nAffiliation: {self.affiliation}\nDescription: {self.description}\n"
32
+
33
+
34
+ class Perspectives(BaseModel):
35
+ analysts: List[Analyst] = Field(
36
+ description="Comprehensive list of analysts with their roles and affiliations.",
37
+ )
38
+
39
+
40
+ class GenerateAnalystsState(TypedDict):
41
+ topic: str # Research topic
42
+ max_analysts: int # Number of analysts to generate
43
+ analysts: List[Analyst] # Analyst asking questions
44
+
45
+
46
+ class InterviewState(MessagesState):
47
+ max_num_turns: int # Number turns of conversation
48
+ context: Annotated[list, add] # Source docs
49
+ analyst: Analyst # Analyst asking questions
50
+ interview: str # Interview transcript
51
+ sections: list # Final key we duplicate in outer state for Send() API
52
+
53
+
54
+ class SearchQuery(BaseModel):
55
+ search_query: str = Field(None, description="Search query for retrieval.")
56
+
57
+
58
+
59
+ def create_analysts(state: GenerateAnalystsState):
60
+
61
+ """ Create analysts """
62
+
63
+ topic=state['topic']
64
+ max_analysts=state['max_analysts']
65
+
66
+ structured_llm = llm.with_structured_output(Perspectives)
67
+
68
+ analyst_instructions = """You are tasked with creating a set of AI analyst personas. Follow these instructions carefully:
69
+ 1. First, review the research topic:{topic}
70
+ 2. Create {max_analysts} analysts with following roles:
71
+ - Industry expert
72
+ - GenAI expert
73
+ - Business strategist
74
+ 3. Determine the most interesting themes based upon documents and/or feedback above.
75
+ 4. Pick the top {max_analysts} themes.
76
+ 5. For each theme, create one analyst with ALL of the following required fields: - name: A fitting name for the analyst - role: Their specific role or title - affiliation: Their primary organization or institution - description: A detailed description of their focus areas, concerns, and motives
77
+ 6. Ensure every analyst includes all four fields without exception.
78
+ Remember: Every analyst **MUST** have all four fields (name, role, affiliation, and description) properly defined. Incomplete personas are not acceptable."""
79
+
80
+ # System message
81
+ system_message = analyst_instructions.format(topic=topic, max_analysts=max_analysts)
82
+
83
+ analysts = structured_llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content="Generate the set of analysts.")])
84
+
85
+ # Write the list of analysis to state
86
+ return {"analysts": analysts.analysts}
87
+
88
+
89
+
90
+
91
+ def vectorstore_writing(doc_splits):
92
+ global retriever
93
+ vectorstore = Chroma.from_documents(
94
+ documents=doc_splits,
95
+ collection_name="rag-chroma",
96
+ embedding = JinaEmbeddings(model_name='jina-embeddings-v3'),
97
+ persist_directory='./chroma_db'
98
+ )
99
+ retriever = vectorstore.as_retriever()
100
+
101
+
102
+
103
+
104
+
105
+ def generate_question(state:InterviewState):
106
+ """ Generate questions for the interview """
107
+
108
+ # print('----STATE----', state)
109
+ # Get the analyst persona
110
+ analyst = state['analyst']
111
+ messages = state['messages']
112
+ context = state["context"]
113
+
114
+ question_instructions = """You are an analyst tasked with interviewing an expert to learn about the use of Generative AI (GenAI) applications in a specific industry or company, if mentioned.
115
+
116
+ Your goal is to uncover interesting and specific insights related to the topic of Generative AI use cases.
117
+
118
+ Interesting: Insights that are surprising, non-obvious, or reveal unique applications of GenAI in the industry or company.
119
+ Specific: Insights that avoid generalities and include specific examples or case studies relevant to the company’s offerings, strategic focus areas, or the industry’s needs.
120
+ Focus Areas:
121
+ Explore the company's key offerings and strategic focus areas (e.g., operations, supply chain, customer experience, etc.), if the company is named.
122
+ Discuss industry-wide trends, innovations, and opportunities enabled by GenAI, such as improved operational efficiency, enhanced customer experiences, or streamlined supply chain processes.
123
+ Gather details on the company or industry's vision and products, focusing on how GenAI can be applied to enhance or transform their workflows.
124
+ Task:
125
+ Begin by introducing yourself with a name that fits your persona, then ask your question.
126
+
127
+ Continue asking follow-up questions to drill down into:
128
+
129
+ Specific GenAI use cases within the company's domain or the industry.
130
+ How these applications align with the company's or industry's strategic goals.
131
+ Real-world examples or future opportunities for integrating GenAI into their processes.
132
+ Complete the interview by saying:
133
+ "Thank you so much for your help!"
134
+
135
+ Remember to stay in character throughout the conversation, reflecting your persona and the provided goals."""
136
+
137
+ # Generate the question
138
+ question = llm.invoke([SystemMessage(content=question_instructions)]+[HumanMessage(content="Generate the question.")])
139
+
140
+ return {"messages": [question]}
141
+
142
+
143
+
144
+ def search_vectorstore(state: InterviewState):
145
+
146
+ """ Retrieve docs from Docstore """
147
+
148
+ # Search query writing
149
+ search_instructions = SystemMessage(content=f"""You will be given a conversation between an analyst and an expert.
150
+
151
+ Your goal is to generate a well-structured query for use in retrieval and / or web-search related to the conversation.
152
+
153
+ First, analyze the full conversation.
154
+
155
+ Pay particular attention to the final question posed by the analyst.
156
+
157
+ Convert this final question into a well-structured web search query""")
158
+
159
+ # Search query
160
+ structured_llm = llm.with_structured_output(SearchQuery)
161
+ search_query = structured_llm.invoke([search_instructions]+state['messages'])
162
+
163
+ # Search
164
+ search_docs = retriever.invoke(input=search_query.search_query)
165
+
166
+ # Format
167
+ formatted_search_docs = "\n\n---\n\n".join(
168
+ [
169
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
170
+ for doc in search_docs
171
+ ]
172
+ )
173
+
174
+ return {"context": [formatted_search_docs]}
175
+
176
+
177
+
178
+ def generate_answer(state: InterviewState):
179
+
180
+ """ Node to answer a question """
181
+
182
+ # Get state
183
+ analyst = state["analyst"]
184
+ messages = state["messages"]
185
+ context = state["context"]
186
+
187
+
188
+ answer_instructions = """You are an expert being interviewed by an analyst.
189
+
190
+ Here is analyst area of focus: {goals}.
191
+
192
+ You goal is to answer a question posed by the interviewer.
193
+
194
+ To answer question, use this context:
195
+
196
+ {context}
197
+
198
+ When answering questions, follow these guidelines:
199
+
200
+ 1. Use only the information provided in the context.
201
+
202
+ 2. Do not introduce external information or make assumptions beyond what is explicitly stated in the context.
203
+
204
+ 3. The context contain sources at the topic of each individual document.
205
+
206
+ 4. Include these sources your answer next to any relevant statements. For example, for source # 1 use [1].
207
+
208
+ 5. List your sources in order at the bottom of your answer. [1] Source 1, [2] Source 2, etc
209
+
210
+ 6. If the source is: <Document source="assistant/docs/llama3_1.pdf" page="7"/>' then just list:
211
+
212
+ [1] assistant/docs/llama3_1.pdf, page 7
213
+
214
+ And skip the addition of the brackets as well as the Document source preamble in your citation."""
215
+
216
+
217
+
218
+ # Answer question
219
+ system_message = answer_instructions.format(goals=analyst.persona, context=context)
220
+ answer = llm.invoke([SystemMessage(content=system_message)]+messages)
221
+
222
+ # Name the message as coming from the expert
223
+ answer.name = "expert"
224
+
225
+ # Append it to state
226
+ return {"messages": [answer]}
227
+
228
+
229
+ def save_interview(state: InterviewState):
230
+
231
+ """ Save interviews """
232
+
233
+ # Get messages
234
+ messages = state["messages"]
235
+
236
+ # Convert interview to a string
237
+ interview = get_buffer_string(messages)
238
+
239
+ # Save to interviews key
240
+ return {"interview": interview}
241
+
242
+
243
+
244
+ def route_messages(state: InterviewState,
245
+ name: str = "expert"):
246
+
247
+ """ Route between question and answer """
248
+
249
+ # Get messages
250
+ messages = state["messages"]
251
+ max_num_turns = state.get('max_num_turns',2)
252
+
253
+ # Check the number of expert answers
254
+ num_responses = len(
255
+ [m for m in messages if isinstance(m, AIMessage) and m.name == name]
256
+ )
257
+
258
+ # End if expert has answered more than the max turns
259
+ if num_responses >= max_num_turns:
260
+ return 'save_interview'
261
+
262
+ # This router is run after each question - answer pair
263
+ # Get the last question asked to check if it signals the end of discussion
264
+ last_question = messages[-2]
265
+
266
+ if "Thank you so much for your help" in last_question.content:
267
+ return 'save_interview'
268
+ return "ask_question"
269
+
270
+
271
+
272
+ def write_section(state: InterviewState):
273
+
274
+ """ Node to answer a question """
275
+
276
+ # Get state
277
+ interview = state["interview"]
278
+ context = state["context"]
279
+ analyst = state["analyst"]
280
+
281
+
282
+ section_writer_instructions = """You are an expert technical writer.
283
+
284
+ Your task is to create a short, easily digestible section of a report based on a set of source documents.
285
+
286
+ 1. Analyze the content of the source documents:
287
+ - The name of each source document is at the start of the document, with the <Document tag.
288
+
289
+ 2. Create a report structure using markdown formatting:
290
+ - Use ## for the section title
291
+ - Use ### for sub-section headers
292
+
293
+ 3. Write the report following this structure:
294
+ a. Title (## header)
295
+ b. Summary (### header)
296
+ c. Sources (### header)
297
+
298
+ 4. Make your title engaging based upon the focus area of the analyst:
299
+ {focus}
300
+
301
+ 5. For the summary section:
302
+ - Set up summary with general background / context related to the focus area of the analyst
303
+ - Emphasize what is novel, interesting, or surprising about insights gathered from the interview
304
+ - Create a numbered list of source documents, as you use them
305
+ - Do not mention the names of interviewers or experts
306
+ - Aim for approximately 400 words maximum
307
+ - Use numbered sources in your report (e.g., [1], [2]) based on information from source documents
308
+
309
+ 6. In the Sources section:
310
+ - Include all sources used in your report
311
+ - Provide full links to relevant websites or specific document paths
312
+ - Separate each source by a newline. Use two spaces at the end of each line to create a newline in Markdown.
313
+ - It will look like:
314
+
315
+ ### Sources
316
+ [1] Link or Document name
317
+ [2] Link or Document name
318
+
319
+ 7. Be sure to combine sources. For example this is not correct:
320
+
321
+ [3] https://ai.meta.com/blog/meta-llama-3-1/
322
+ [4] https://ai.meta.com/blog/meta-llama-3-1/
323
+
324
+ There should be no redundant sources. It should simply be:
325
+
326
+ [3] https://ai.meta.com/blog/meta-llama-3-1/
327
+
328
+ 8. Final review:
329
+ - Ensure the report follows the required structure
330
+ - Include no preamble before the title of the report
331
+ - Check that all guidelines have been followed"""
332
+
333
+
334
+ # Write section using either the gathered source docs from interview (context) or the interview itself (interview)
335
+ system_message = section_writer_instructions.format(focus=analyst.description)
336
+ section = llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content=f"Use this source to write your section: {context}")])
337
+
338
+ # Append it to state
339
+ return {"sections": [section.content]}
340
+
341
+
342
+
343
+ # Add nodes and edges
344
+ interview_builder = StateGraph(InterviewState)
345
+ interview_builder.add_node("ask_question", generate_question)
346
+ interview_builder.add_node("search_rag", search_vectorstore)
347
+ interview_builder.add_node("answer_question", generate_answer)
348
+ interview_builder.add_node("save_interview", save_interview)
349
+ interview_builder.add_node("write_section", write_section)
350
+
351
+ # Flow
352
+ interview_builder.add_edge(START, "ask_question")
353
+ interview_builder.add_edge("ask_question", "search_rag")
354
+ interview_builder.add_edge("search_rag", "answer_question")
355
+ interview_builder.add_conditional_edges("answer_question", route_messages,['ask_question','save_interview'])
356
+ interview_builder.add_edge("save_interview", "write_section")
357
+ interview_builder.add_edge("write_section", END)
358
+
359
+ # Interview
360
+ memory = MemorySaver()
361
+ interview_graph = interview_builder.compile(checkpointer=memory).with_config(run_name="Conduct Interviews")
362
+
363
+
364
+
365
+
366
+ class ResearchGraphState(TypedDict):
367
+ topic: str # Research topic
368
+ max_analysts: int # Number of analysts
369
+ analysts: List[Analyst] # Analyst asking questions
370
+ sections: Annotated[list, add] # Send() API key
371
+ introduction: str # Introduction for the final report
372
+ content: str # Content for the final report
373
+ conclusion: str # Conclusion for the final report
374
+ final_report: str # Final report
375
+ human_analyst_feedback: Optional[str] # Human feedback
376
+
377
+
378
+
379
+ def initiate_all_interviews(state: ResearchGraphState):
380
+ """ This is the "map" step where we run each interview sub-graph using Send API """
381
+
382
+ # Check if human feedback
383
+ human_analyst_feedback=state.get('human_analyst_feedback')
384
+ if human_analyst_feedback:
385
+ # Return to create_analysts
386
+ return "create_analysts"
387
+
388
+ # Otherwise kick off interviews in parallel via Send() API
389
+ else:
390
+ topic = state["topic"]
391
+ return [Send("conduct_interview", {"analyst": analyst,
392
+ "messages": [HumanMessage(
393
+ content=f"So you said you were writing an article on {topic}?")],
394
+ }) for analyst in state["analysts"]]
395
+
396
+ report_writer_instructions = '''You are a technical writer tasked with creating a report on the overall topic:
397
+
398
+ **{topic}**
399
+
400
+ Your team of analysts has conducted interviews and written memos based on their findings. Your task is to consolidate the insights from these memos into a cohesive and structured report, following this format:
401
+
402
+ Think deeply and Generate atleat 12 use cases based on the memos.
403
+
404
+ ### Format for Each Use Case
405
+ 1. **Title Header:** Use a descriptive title for each use case, such as "## Use Case 1: AI-Powered Predictive Maintenance."
406
+ 2. **Objective/Use Case:** Summarize the primary goal or application of AI for this use case in one or two sentences.
407
+ 3. **AI Application:** Describe the specific AI technologies or methods used to achieve the objective.
408
+ 4. **Cross-Functional Benefit:** Outline the key benefits across various functions, formatted as bullet points, specifying which department or area benefits from the AI use case.
409
+
410
+ ### Example Format:
411
+
412
+ ## Use Case 1: AI-Powered Predictive Maintenance
413
+ **Objective/Use Case:** Reduce equipment downtime and maintenance costs by predicting equipment failures before they occur.
414
+ **AI Application:** Implement machine learning algorithms that analyze real-time sensor data from machinery to predict potential failures and schedule maintenance proactively.
415
+ **Cross-Functional Benefit:**
416
+ - **Operations & Maintenance:** Minimizes unplanned downtime and extends equipment lifespan.
417
+ - **Finance:** Reduces maintenance costs and improves budgeting accuracy.
418
+ - **Supply Chain:** Optimizes spare parts inventory based on predictive insights.
419
+
420
+ ## Use Case 2: Real-Time Quality Control with Computer Vision
421
+ **Objective/Use Case:** Enhance product quality by detecting defects in products during manufacturing.
422
+ **AI Application:** Deploy AI-powered computer vision systems on production lines to identify surface defects and inconsistencies in real time.
423
+ **Cross-Functional Benefit:**
424
+ - **Quality Assurance:** Improves defect detection accuracy and reduces scrap rates.
425
+ - **Production:** Enables immediate corrective actions, enhancing overall efficiency.
426
+ - **Customer Satisfaction:** Delivers higher-quality products, strengthening client relationships.
427
+
428
+ ### Report Guidelines
429
+ 1. Begin with the first use case title in the specified format.
430
+ 2. Do not include any preamble or introductory text for the report.
431
+ 3. Consolidate insights into distinct use cases, with a focus on clarity and relevance.
432
+ 4. Preserve any citations included in the memos, formatted in brackets, e.g., [1], [2].
433
+ 5. After detailing all use cases, include a **Sources** section with the title: `## Sources`.
434
+ 6. Be sure to combine sources. For example this is not correct:
435
+
436
+ [3] https://ai.meta.com/blog/meta-llama-3-1/
437
+ [4] https://ai.meta.com/blog/meta-llama-3-1/
438
+
439
+ There should be no redundant sources. It should simply be:
440
+ [3] https://ai.meta.com/blog/meta-llama-3-1/
441
+
442
+ ### Your Inputs
443
+ You will be given a collection of memos from your analysts under `{context}`. Extract and distill insights into specific use cases, ensuring each use case adheres to the prescribed format.'''
444
+
445
+ def write_report(state: ResearchGraphState):
446
+ # Full set of sections
447
+ sections = state["sections"]
448
+ topic = state["topic"]
449
+
450
+ # Concat all sections together
451
+ formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
452
+
453
+ # Summarize the sections into a final report
454
+ system_message = report_writer_instructions.format(topic=topic, context=formatted_str_sections)
455
+ report = llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content=f"Write a report based upon these memos.")])
456
+ return {"content": report.content}
457
+
458
+
459
+ def human_feedback(state: ResearchGraphState):
460
+ """ No-op node that should be interrupted on """
461
+ pass
462
+
463
+
464
+
465
+ def write_introduction(state: ResearchGraphState):
466
+ # Full set of sections
467
+ sections = state["sections"]
468
+ topic = state["topic"]
469
+
470
+ # Concat all sections together
471
+ formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
472
+
473
+ intro_conclusion_instructions = """You are a technical writer finishing a report on {topic}
474
+
475
+ You will be given all of the sections of the report.
476
+
477
+ You job is to write a crisp and compelling introduction or conclusion section.
478
+
479
+ The user will instruct you whether to write the introduction or conclusion.
480
+
481
+ Include no pre-amble for either section.
482
+
483
+ Target around 100 words, crisply previewing (for introduction) or recapping (for conclusion) all of the sections of the report.
484
+
485
+ Use markdown formatting.
486
+
487
+ For your introduction, create a compelling title and use the # header for the title.
488
+
489
+ For your introduction, use ## Introduction as the section header.
490
+
491
+ For your conclusion, use ## Conclusion as the section header.
492
+
493
+ Here are the sections to reflect on for writing: {formatted_str_sections}"""
494
+
495
+
496
+ # Summarize the sections into a final report
497
+ instructions = intro_conclusion_instructions.format(topic=topic, formatted_str_sections=formatted_str_sections)
498
+ intro = llm.invoke([instructions]+[HumanMessage(content=f"Write the report introduction")])
499
+ return {"introduction": intro.content}
500
+
501
+
502
+ def write_conclusion(state: ResearchGraphState):
503
+ # Full set of sections
504
+ sections = state["sections"]
505
+ topic = state["topic"]
506
+
507
+ # Concat all sections together
508
+ formatted_str_sections = "\n\n".join([f"{section}" for section in sections])
509
+
510
+ intro_conclusion_instructions = """You are a technical writer finishing a report on {topic}
511
+
512
+ You will be given all of the sections of the report.
513
+
514
+ You job is to write a crisp and compelling introduction or conclusion section.
515
+
516
+ The user will instruct you whether to write the introduction or conclusion.
517
+
518
+ Include no pre-amble for either section.
519
+
520
+ Target around 100 words, crisply previewing (for introduction) or recapping (for conclusion) all of the sections of the report.
521
+
522
+ Use markdown formatting.
523
+
524
+ For your introduction, create a compelling title and use the # header for the title.
525
+
526
+ For your introduction, use ## Introduction as the section header.
527
+
528
+ For your conclusion, use ## Conclusion as the section header.
529
+
530
+ Here are the sections to reflect on for writing: {formatted_str_sections}"""
531
+
532
+
533
+ # Summarize the sections into a final report
534
+ instructions = intro_conclusion_instructions.format(topic=topic, formatted_str_sections=formatted_str_sections)
535
+ conclusion = llm.invoke([instructions]+[HumanMessage(content=f"Write the report conclusion")])
536
+ return {"conclusion": conclusion.content}
537
+
538
+
539
+ def finalize_report(state: ResearchGraphState):
540
+ """ The is the "reduce" step where we gather all the sections, combine them, and reflect on them to write the intro/conclusion """
541
+ # Save full final report
542
+ content = state["content"]
543
+ if content.startswith("## Insights"):
544
+ content = content.strip("## Insights")
545
+ if "## Sources" in content:
546
+ try:
547
+ content, sources = content.split("\n## Sources\n")
548
+ except:
549
+ sources = None
550
+ else:
551
+ sources = None
552
+
553
+ final_report = state["introduction"] + "\n\n---\n\n" + content + "\n\n---\n\n" + state["conclusion"]
554
+ if sources is not None:
555
+ final_report += "\n\n## Sources\n" + sources
556
+ return {"final_report": final_report}
557
+
558
+
559
+
560
+ def usecase_agent_func(topic,max_analysts):
561
+ # Add nodes and edges
562
+ builder = StateGraph(ResearchGraphState)
563
+ builder.add_node("create_analysts", create_analysts)
564
+ builder.add_node("human_feedback", human_feedback)
565
+ builder.add_node("conduct_interview", interview_builder.compile())
566
+ builder.add_node("write_report",write_report)
567
+ builder.add_node("write_introduction",write_introduction)
568
+ builder.add_node("write_conclusion",write_conclusion)
569
+ builder.add_node("finalize_report",finalize_report)
570
+
571
+ # Logic
572
+ builder.add_edge(START, "create_analysts")
573
+ builder.add_edge("create_analysts", "human_feedback")
574
+ builder.add_conditional_edges("human_feedback", initiate_all_interviews, ["create_analysts", "conduct_interview"])
575
+ builder.add_edge("conduct_interview", "write_report")
576
+ builder.add_edge("conduct_interview", "write_introduction")
577
+ builder.add_edge("conduct_interview", "write_conclusion")
578
+ builder.add_edge(["write_conclusion", "write_report", "write_introduction"], "finalize_report")
579
+ builder.add_edge("finalize_report", END)
580
+
581
+ # Compile
582
+ memory = MemorySaver()
583
+ graph = builder.compile(checkpointer=memory)
584
+ config = {"configurable": {"thread_id": "1"}}
585
+ graph.invoke({"topic":topic,
586
+ "max_analysts":max_analysts,
587
+ 'human_analyst_feedback': None},
588
+ config)
589
+ final_state = graph.get_state(config)
590
+ report = final_state.values.get('final_report')
591
+
592
+ print('-----REPORT-----', report)
593
+
594
+ return report
595
+
596
+
597
+