File size: 6,611 Bytes
2631838 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
import pinecone
import chainlit as cl
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
ChatPromptTemplate,
PromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.prompts.prompt import PromptTemplate
pinecone.init(
api_key=os.environ.get("PINECONE_API_KEY"),
environment=os.environ.get("PINECONE_ENV"),
)
from langchain.chains import LLMChain, TransformChain, SequentialChain
from chainlit import on_message, on_chat_start
index_name = "spark"
# Optional
namespace = None
embeddings = CohereEmbeddings(model='embed-english-light-v2.0',cohere_api_key=os.environ.get("COHERE_API_KEY"))
llm = ChatOpenAI(temperature=0.7, verbose=True)
docsearch = Pinecone.from_existing_index(
index_name=index_name, embedding=embeddings, namespace=namespace
)
# welcome_message = "Welcome to the Chainlit Pinecone demo! Ask anything about documents you vectorized and stored in your Pinecone DB."
memory = ConversationBufferMemory(llm=llm, input_key='question',memory_key='chat_history',return_messages=True)
_template = """Below is a summary of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base. Generate a search query based on the conversation and the new question.
Don't generate the search query if the user is conversing generally or engaging in small talk. In which case just return the original question.
Chat History:
{chat_history}
Question:
{question}
Remember - Don't change the search query from the user's question if user is engaging in small talk.
Search query:
"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
spark = """You are SPARK, a Prompt Engineering Assistant. SPARK stands for Smart Prompt Assistant and Resource Knowledgebase.
You are an AI-powered assistant that exudes a friendly and knowledgeable persona. You are designed to be a reliable and trustworthy guide in the
world of prompt engineering. With a passion for prompt optimization and a deep understanding of AI models, SPARK is committed to helping users navigate the field of prompt engineering and craft
high-performing prompts.
Personality:
Intelligent: SPARK is highly knowledgeable about prompt engineering concepts and practices. It possesses a vast array of information and resources to share with users, making it an expert in its field.
Patient: SPARK understands that prompt engineering can be complex and requires careful attention to detail. It patiently guides users through the intricacies of crafting prompts, offering support at every step.
Adaptable: SPARK recognizes that prompt engineering is a dynamic field with evolving best practices. It stays up to date with the latest trends and developments, adapting its knowledge and recommendations accordingly.
Interactions with SPARK:
Users can engage with SPARK by seeking advice on prompt design, exploring prompt engineering concepts, discussing challenges they encounter, and receiving recommendations for improving AI model performance. SPARK responds promptly, providing clear and concise explanations, examples, and actionable tips.
Important:
Answer with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. If asking a clarifying question to the user would help, ask the question.
Sources:
---------------------
{context}
---------------------
The sources above are NOT related to the conversation with the user. Ignore the sources if user is engaging in small talk.
"""
question_gen_prompt = PromptTemplate(template=_template, input_variables=["question", "chat_history"] )
@on_chat_start
def init():
memory.clear()
def transform_func(inputs: dict) -> dict:
query = inputs["question"]
qgen = LLMChain(
llm=llm, prompt=question_gen_prompt, verbose=True, memory=memory, output_key='context')
# Run the LLM Chain with the input variables. Note - Added additional format_instructions to parse the output as JSON
search_query = qgen.predict(question=query)
result = docsearch.similarity_search(search_query)
context = [f"\n{source.page_content}\nSource:\n{source.metadata.get('title')} - {source.metadata.get('source')}" for source in result]
return {"context": '\n'.join(context), "query":query}
@on_message
@cl.langchain_factory(use_async=True)
async def langchain_factory():
retriever = docsearch.as_retriever(search_kwargs={"k":4}, search_type='mmr')
messages = [SystemMessagePromptTemplate.from_template(spark)]
messages.extend(memory.chat_memory.messages)
messages.append(HumanMessagePromptTemplate.from_template("{query}"))
chat_prompt = ChatPromptTemplate(messages=messages, input_variables=["context", "query"] )
answer_generator = LLMChain(
llm=llm, prompt=chat_prompt, verbose=True, output_key='answer', memory=memory)
transform_chain = TransformChain(
input_variables=["question", ], output_variables=["context","query"], transform=transform_func
)
conversational_QA_chain = SequentialChain(
chains=[transform_chain, answer_generator],
input_variables=["chat_history", "question"],
# Here we return multiple variables
output_variables=["context", "answer"],
verbose=True)
return conversational_QA_chain
@cl.langchain_run
async def run(chain, input_str):
res = chain._call({"question":input_str})
await cl.Message(content=res["answer"]).send()
@cl.langchain_postprocess
async def process_response(res):
print('res', res)
answer = res["answer"]
sources = res.get("sources", "").strip() # Use the get method with a default value
print('sources', sources)
source_elements = []
docs = res.get("source_documents", None)
if docs:
metadatas = [doc.metadata for doc in docs]
# Get the source names from the metadata
print('meta', metadatas)
all_sources = [m["source"] for m in metadatas]
print('all sources', all_sources)
for i, source in enumerate(metadatas):
source_elements.append(cl.Text(content=source.get('source'), name=source.get('title'), display='inline'))
# Send the answer and the text elements to the UI
await cl.Message(content=answer, elements=source_elements).send() |