Spaces:
Sleeping
Sleeping
File size: 4,618 Bytes
c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 8aeabcb c6907ac 8aeabcb c6907ac 5a9839d c6907ac 5a9839d c6907ac a0c3f02 c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d c6907ac 5a9839d 4ab5918 5a9839d 4ab5918 5a9839d 3252f04 5a9839d 3252f04 5a9839d 9afb5c4 5a9839d 0bf285c 9afb5c4 3252f04 9afb5c4 04829f6 0bf285c a0c3f02 0bf285c 71a7672 0bf285c c6907ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import chainlit as cl
import os
from classes.app_state import AppState
from classes.model_run_state import ModelRunState
from dotenv import load_dotenv
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from operator import itemgetter
from utilities.doc_utilities import get_documents
from utilities.templates import get_qa_prompt
from utilities.vector_utilities import create_vector_store
document_urls = [
"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf",
"https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf",
]
# Load environment variables from .env file
load_dotenv()
# Get the OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
# Setup our state and read the documents
app_state = AppState()
app_state.set_debug(False)
app_state.set_document_urls(document_urls)
get_documents(app_state)
# set up this model run
chainlit_state = ModelRunState()
chainlit_state.name = "Chainlit"
chainlit_state.qa_model_name = "gpt-4o-mini"
chainlit_state.qa_model = ChatOpenAI(model=chainlit_state.qa_model_name, openai_api_key=openai_api_key)
hf_username = "rchrdgwr"
hf_repo_name = "finetuned-arctic-model"
finetuned_model_name = f"{hf_username}/{hf_repo_name}"
chainlit_state.embedding_model_name = finetuned_model_name
chainlit_state.embedding_model = HuggingFaceEmbeddings(model_name=chainlit_state.embedding_model_name)
chainlit_state.chunk_size = 1000
chainlit_state.chunk_overlap = 100
create_vector_store(app_state, chainlit_state )
chat_prompt = get_qa_prompt()
# create the chain
retrieval_augmented_qa_chain = (
{"context": itemgetter("question") | chainlit_state.retriever, "question": itemgetter("question")}
| RunnablePassthrough.assign(context=itemgetter("context"))
| {"response": chat_prompt | chainlit_state.qa_model, "context": itemgetter("context")}
)
opening_content = """
Welcome!
I am AI Mentor - your guide to understanding the evolving AI industry.
My goal is to help you learn how to think about building ethical and useful applications.
I can answer your questions on AI based on the following 2 documents:
- Blueprint for an AI Bill of Rights by the Whitehouse Office of Science and Technology Policy
- Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile
What would you like to learn about AI today?
"""
@cl.on_chat_start
async def on_chat_start():
await cl.Message(content=opening_content).send()
@cl.on_message
async def main(message):
# formatted_prompt = prompt.format(question=message.content)
# Call the LLM with the formatted prompt
# response = llm.invoke(formatted_prompt)
#
MAX_PREVIEW_LENGTH = 100
response = retrieval_augmented_qa_chain.invoke({"question" : message.content })
answer_content = response["response"].content
msg = cl.Message(content="")
for i in range(0, len(answer_content), 50): # Adjust chunk size (e.g., 50 characters)
chunk = answer_content[i:i+50]
await msg.stream_token(chunk)
# Send the response back to the user
# await msg.send()
context_documents = response["context"]
# num_contexts = len(context_documents)
# context_msg = f"Number of found context: {num_contexts}"
# await cl.Message(content=context_msg).send()
chunk_string = "Sources: "
for doc in context_documents:
document_title = doc.metadata.get("source", "Unknown Document")
chunk_number = doc.metadata.get("chunk_number", "Unknown Chunk")
if document_title == "":
doc_string = "BOR"
else:
doc_string = "RMF"
chunk_string = chunk_string + " " + doc_string + "-" + str(chunk_number)
await cl.Message(
content=f"{chunk_string}",
).send()
# document_context = doc.page_content.strip()
# truncated_context = document_context[:MAX_PREVIEW_LENGTH] + ("..." if len(document_context) > MAX_PREVIEW_LENGTH else "")
# print("----------------------------------------")
# print(truncated_context)
# await cl.Message(
# content=f"**{document_title} ( Chunk: {chunk_number})**",
# elements=[
# cl.Text(content=truncated_context, display="inline")
# ]
# ).send() |