File size: 6,611 Bytes
2631838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
import pinecone
import chainlit as cl
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.prompts.prompt import PromptTemplate

pinecone.init(
    api_key=os.environ.get("PINECONE_API_KEY"),
    environment=os.environ.get("PINECONE_ENV"),
)
from langchain.chains import LLMChain, TransformChain, SequentialChain

from chainlit import on_message, on_chat_start

index_name = "spark"

# Optional
namespace = None

embeddings = CohereEmbeddings(model='embed-english-light-v2.0',cohere_api_key=os.environ.get("COHERE_API_KEY"))

llm = ChatOpenAI(temperature=0.7, verbose=True)

docsearch = Pinecone.from_existing_index(
        index_name=index_name, embedding=embeddings, namespace=namespace
    )

# welcome_message = "Welcome to the Chainlit Pinecone demo! Ask anything about documents you vectorized and stored in your Pinecone DB."
memory = ConversationBufferMemory(llm=llm, input_key='question',memory_key='chat_history',return_messages=True)
_template = """Below is a summary of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base. Generate a search query based on the conversation and the new question.
Don't generate the search query if the user is conversing generally or engaging in small talk. In which case just return the original question.
Chat History:
{chat_history}

Question:
{question}

Remember - Don't change the search query from the user's question if user is engaging in small talk.
Search query:
"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

spark = """You are SPARK, a Prompt Engineering Assistant. SPARK stands for Smart Prompt Assistant and Resource Knowledgebase.

You are an AI-powered assistant that exudes a friendly and knowledgeable persona. You are designed to be a reliable and trustworthy guide in the
world of prompt engineering. With a passion for prompt optimization and a deep understanding of AI models, SPARK is committed to helping users navigate the field of prompt engineering and craft
high-performing prompts.

Personality:

Intelligent: SPARK is highly knowledgeable about prompt engineering concepts and practices. It possesses a vast array of information and resources to share with users, making it an expert in its field.

Patient: SPARK understands that prompt engineering can be complex and requires careful attention to detail. It patiently guides users through the intricacies of crafting prompts, offering support at every step.

Adaptable: SPARK recognizes that prompt engineering is a dynamic field with evolving best practices. It stays up to date with the latest trends and developments, adapting its knowledge and recommendations accordingly.

Interactions with SPARK:
Users can engage with SPARK by seeking advice on prompt design, exploring prompt engineering concepts, discussing challenges they encounter, and receiving recommendations for improving AI model performance. SPARK responds promptly, providing clear and concise explanations, examples, and actionable tips.

Important:
Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. If asking a clarifying question to the user would help, ask the question. 

Sources:
---------------------
    {context}
---------------------
The sources above are NOT related to the conversation with the user. Ignore the sources if user is engaging in small talk.
"""
question_gen_prompt = PromptTemplate(template=_template, input_variables=["question", "chat_history"]  )

@on_chat_start
def init():
    memory.clear()
    
def transform_func(inputs: dict) -> dict:
    query = inputs["question"]
    qgen = LLMChain(
                llm=llm, prompt=question_gen_prompt, verbose=True, memory=memory, output_key='context')
            # Run the LLM Chain with the input variables. Note - Added additional format_instructions to parse the output as JSON
    search_query = qgen.predict(question=query)
    result = docsearch.similarity_search(search_query)
    context = [f"\n{source.page_content}\nSource:\n{source.metadata.get('title')} - {source.metadata.get('source')}" for source in result]
    return {"context": '\n'.join(context), "query":query}


@on_message
@cl.langchain_factory(use_async=True)
async def langchain_factory():
    retriever = docsearch.as_retriever(search_kwargs={"k":4}, search_type='mmr')
    messages = [SystemMessagePromptTemplate.from_template(spark)]
    messages.extend(memory.chat_memory.messages)
    messages.append(HumanMessagePromptTemplate.from_template("{query}"))

    chat_prompt = ChatPromptTemplate(messages=messages, input_variables=["context", "query"]  )
    answer_generator = LLMChain(
            llm=llm, prompt=chat_prompt, verbose=True, output_key='answer', memory=memory)

    transform_chain = TransformChain(
        input_variables=["question", ], output_variables=["context","query"], transform=transform_func
    )
        
    conversational_QA_chain = SequentialChain(
        chains=[transform_chain, answer_generator],
        input_variables=["chat_history", "question"],
        # Here we return multiple variables
        output_variables=["context", "answer"],
        verbose=True)

    return conversational_QA_chain

@cl.langchain_run
async def run(chain, input_str):
    res = chain._call({"question":input_str})
    await cl.Message(content=res["answer"]).send()
    
    
@cl.langchain_postprocess
async def process_response(res):
    print('res', res)
    answer = res["answer"]
    sources = res.get("sources", "").strip()  # Use the get method with a default value
    print('sources', sources)
    source_elements = []
    docs = res.get("source_documents", None)

    if docs:
        metadatas = [doc.metadata for doc in docs]
        # Get the source names from the metadata
        print('meta', metadatas)
        all_sources = [m["source"] for m in metadatas]
        print('all sources', all_sources)
        for i, source in enumerate(metadatas):
            source_elements.append(cl.Text(content=source.get('source'), name=source.get('title'), display='inline'))

    # Send the answer and the text elements to the UI
    await cl.Message(content=answer, elements=source_elements).send()