File size: 4,618 Bytes
c6907ac
5a9839d
c6907ac
 
5a9839d
c6907ac
5a9839d
c6907ac
 
5a9839d
c6907ac
 
 
5a9839d
 
 
 
 
 
 
 
 
 
 
 
c6907ac
 
 
 
8aeabcb
c6907ac
8aeabcb
c6907ac
 
 
5a9839d
c6907ac
 
5a9839d
c6907ac
a0c3f02
c6907ac
5a9839d
c6907ac
 
 
 
 
 
5a9839d
c6907ac
5a9839d
c6907ac
5a9839d
 
c6907ac
5a9839d
c6907ac
5a9839d
 
 
4ab5918
 
 
 
 
 
 
 
5a9839d
 
4ab5918
5a9839d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3252f04
 
5a9839d
 
 
3252f04
5a9839d
 
 
 
 
9afb5c4
5a9839d
0bf285c
9afb5c4
 
3252f04
 
9afb5c4
04829f6
0bf285c
 
a0c3f02
 
 
 
 
 
0bf285c
 
71a7672
 
0bf285c
c6907ac
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import chainlit as cl
import os
from classes.app_state import AppState
from classes.model_run_state import ModelRunState
from dotenv import load_dotenv
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from operator import itemgetter
from utilities.doc_utilities import get_documents
from utilities.templates import get_qa_prompt
from utilities.vector_utilities import create_vector_store

document_urls = [
    "https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf",
     "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf",
]

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

# Setup our state and read the documents
app_state = AppState()
app_state.set_debug(False)
app_state.set_document_urls(document_urls)

get_documents(app_state)

# set up this model run
chainlit_state = ModelRunState()
chainlit_state.name = "Chainlit"

chainlit_state.qa_model_name = "gpt-4o-mini" 
chainlit_state.qa_model = ChatOpenAI(model=chainlit_state.qa_model_name, openai_api_key=openai_api_key)

hf_username = "rchrdgwr"
hf_repo_name = "finetuned-arctic-model"
finetuned_model_name = f"{hf_username}/{hf_repo_name}"

chainlit_state.embedding_model_name = finetuned_model_name
chainlit_state.embedding_model = HuggingFaceEmbeddings(model_name=chainlit_state.embedding_model_name)

chainlit_state.chunk_size = 1000
chainlit_state.chunk_overlap = 100
create_vector_store(app_state, chainlit_state )

chat_prompt = get_qa_prompt()

# create the chain

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | chainlit_state.retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": chat_prompt | chainlit_state.qa_model, "context": itemgetter("context")}
)

opening_content = """

Welcome!



I am AI Mentor - your guide to understanding the evolving AI industry.



My goal is to help you learn how to think about building ethical and useful applications.



I can answer your questions on AI based on the following 2 documents:

- Blueprint for an AI Bill of Rights by the Whitehouse Office of Science and Technology Policy

- Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile



What would you like to learn about AI today?

"""

@cl.on_chat_start
async def on_chat_start():

    await cl.Message(content=opening_content).send()



@cl.on_message
async def main(message):

    # formatted_prompt = prompt.format(question=message.content)

    # Call the LLM with the formatted prompt
    # response = llm.invoke(formatted_prompt)
    # 
    MAX_PREVIEW_LENGTH = 100

    response = retrieval_augmented_qa_chain.invoke({"question" : message.content })
    answer_content = response["response"].content
    msg = cl.Message(content="")    

    for i in range(0, len(answer_content), 50):  # Adjust chunk size (e.g., 50 characters)
        chunk = answer_content[i:i+50]
        await msg.stream_token(chunk)

    # Send the response back to the user
    # await msg.send()

    context_documents = response["context"]
    # num_contexts = len(context_documents)
    # context_msg = f"Number of found context: {num_contexts}"


    # await cl.Message(content=context_msg).send()
    chunk_string = "Sources: "
    for doc in context_documents:
        document_title = doc.metadata.get("source", "Unknown Document") 
        chunk_number = doc.metadata.get("chunk_number", "Unknown Chunk")
        if document_title == "":
            doc_string = "BOR"
        else:
            doc_string = "RMF" 
        chunk_string = chunk_string + " " + doc_string + "-" + str(chunk_number)

    await cl.Message(
        content=f"{chunk_string}",

    ).send()

    #     document_context = doc.page_content.strip() 
    #     truncated_context = document_context[:MAX_PREVIEW_LENGTH] + ("..." if len(document_context) > MAX_PREVIEW_LENGTH else "")
    #     print("----------------------------------------")
    #     print(truncated_context)

    #     await cl.Message(
    #         content=f"**{document_title} ( Chunk: {chunk_number})**",
    #         elements=[
    #             cl.Text(content=truncated_context, display="inline")  
    #         ]
    #     ).send()