jeevan commited on
Commit
ef80283
1 Parent(s): f4ae443

locally working

Browse files
Files changed (3) hide show
  1. .chainlit/config.toml +84 -0
  2. app.py +25 -51
  3. chainlit.md +14 -0
.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
app.py CHANGED
@@ -1,7 +1,3 @@
1
- ### Import Section ###
2
- """
3
- IMPORTS HERE
4
- """
5
  import os
6
  import uuid
7
  from dotenv import load_dotenv
@@ -9,32 +5,22 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
  from langchain_community.document_loaders import PyMuPDFLoader
10
  from qdrant_client import QdrantClient
11
  from qdrant_client.http.models import Distance, VectorParams
12
- from langchain_openai.embeddings import OpenAIEmbeddings
13
  from langchain.storage import LocalFileStore
14
  from langchain_qdrant import QdrantVectorStore
15
  from langchain.embeddings import CacheBackedEmbeddings
16
- from langchain_core.prompts import ChatPromptTemplate
17
  from chainlit.types import AskFileResponse
18
- from langchain_core.globals import set_llm_cache
19
- from langchain_openai import ChatOpenAI
20
- from langchain_core.caches import InMemoryCache
21
  from operator import itemgetter
22
  from langchain_core.runnables.passthrough import RunnablePassthrough
23
  import chainlit as cl
24
  from langchain_core.runnables.config import RunnableConfig
25
- from langchain_community.llms import HuggingFaceEndpoint
26
  from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
27
  from langchain_core.prompts import PromptTemplate
28
 
29
- import numpy as np
30
- from numpy.linalg import norm
31
-
32
  load_dotenv()
33
 
34
- ### Global Section ###
35
- """
36
- GLOBAL CODE HERE
37
- """
38
 
39
  RAG_PROMPT_TEMPLATE = """\
40
  <|start_header_id|>system<|end_header_id|>
@@ -49,10 +35,11 @@ Context:
49
 
50
  <|start_header_id|>assistant<|end_header_id|>
51
  """
52
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
53
  hf_llm = HuggingFaceEndpoint(
54
- endpoint_url=os.environ["YOUR_LLM_ENDPOINT_URL"],
55
- max_new_tokens=512,
56
  top_k=10,
57
  top_p=0.95,
58
  typical_p=0.95,
@@ -67,12 +54,6 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
67
  )
68
 
69
  rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
70
- rag_chain = rag_prompt | hf_llm
71
-
72
- def cosine_similarity(phrase_1, phrase_2):
73
- vec_1 = hf_embeddings.embed_documents([phrase_1])[0]
74
- vec2_2 = hf_embeddings.embed_documents([phrase_2])[0]
75
- return np.dot(vec_1, vec2_2) / (norm(vec_1) * norm(vec2_2))
76
 
77
  def process_file(file: AskFileResponse):
78
  import tempfile
@@ -91,15 +72,11 @@ def process_file(file: AskFileResponse):
91
  return docs
92
 
93
 
94
- ### On Chat Start (Session Start) Section ###
95
  @cl.on_chat_start
96
  async def on_chat_start():
97
- """ SESSION SPECIFIC CODE HERE """
98
  files = None
99
 
100
  while files == None:
101
- # Async method: This allows the function to pause execution while waiting for the user to upload a file,
102
- # without blocking the entire application. It improves responsiveness and scalability.
103
  files = await cl.AskFileMessage(
104
  content="Please upload a PDF file to begin!",
105
  accept=["application/pdf"],
@@ -115,38 +92,36 @@ async def on_chat_start():
115
  await msg.send()
116
  docs = process_file(file)
117
 
118
- # Typical QDrant Client Set-up
119
  collection_name = f"pdf_to_parse_{uuid.uuid4()}"
120
  client = QdrantClient(":memory:")
121
  client.create_collection(
122
  collection_name=collection_name,
123
- vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
124
  )
125
 
126
  # Adding cache!
127
- store = LocalFileStore("./cache/")
128
- cached_embedder = CacheBackedEmbeddings.from_bytes_store(
129
- hf_embeddings, store, namespace=hf_embeddings.model
130
- )
131
 
132
  # Typical QDrant Vector Store Set-up
133
  vectorstore = QdrantVectorStore(
134
  client=client,
135
  collection_name=collection_name,
136
- embedding=cached_embedder)
 
137
 
138
  for i in range(0, len(docs), 32):
139
  if i == 0:
140
- vectorstore = docs.from_documents(docs[i:i+32], hf_embeddings)
141
  continue
142
- vectorstore.add_documents(docs[i:i+32])
143
 
144
- retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
145
 
146
  retrieval_augmented_qa_chain = (
147
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
148
- | RunnablePassthrough.assign(context=itemgetter("context"))
149
- | rag_prompt | hf_llm
150
  )
151
 
152
  # Let the user know that the system is ready
@@ -166,19 +141,18 @@ def rename(orig_author: str):
166
  ### On Message Section ###
167
  @cl.on_message
168
  async def main(message: cl.Message):
169
- """
170
- MESSAGE CODE HERE
171
- """
172
  runnable = cl.user_session.get("chain")
173
 
174
  msg = cl.Message(content="")
175
 
176
- # Async method: Using astream allows for asynchronous streaming of the response,
177
- # improving responsiveness and user experience by showing partial results as they become available.
178
  async for chunk in runnable.astream(
179
- {"question": message.content},
180
  config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
181
  ):
182
- await msg.stream_token(chunk.content)
 
 
183
 
184
- await msg.send()
 
 
 
 
 
 
 
1
  import os
2
  import uuid
3
  from dotenv import load_dotenv
 
5
  from langchain_community.document_loaders import PyMuPDFLoader
6
  from qdrant_client import QdrantClient
7
  from qdrant_client.http.models import Distance, VectorParams
 
8
  from langchain.storage import LocalFileStore
9
  from langchain_qdrant import QdrantVectorStore
10
  from langchain.embeddings import CacheBackedEmbeddings
 
11
  from chainlit.types import AskFileResponse
 
 
 
12
  from operator import itemgetter
13
  from langchain_core.runnables.passthrough import RunnablePassthrough
14
  import chainlit as cl
15
  from langchain_core.runnables.config import RunnableConfig
16
+ from langchain_huggingface import HuggingFaceEndpoint
17
  from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
18
  from langchain_core.prompts import PromptTemplate
19
 
 
 
 
20
  load_dotenv()
21
 
22
+ YOUR_LLM_ENDPOINT_URL = os.environ["YOUR_LLM_ENDPOINT_URL"]
23
+ YOUR_EMBED_MODEL_URL = os.environ["YOUR_EMBED_MODEL_URL"]
 
 
24
 
25
  RAG_PROMPT_TEMPLATE = """\
26
  <|start_header_id|>system<|end_header_id|>
 
35
 
36
  <|start_header_id|>assistant<|end_header_id|>
37
  """
38
+
39
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
40
  hf_llm = HuggingFaceEndpoint(
41
+ endpoint_url=f"{YOUR_LLM_ENDPOINT_URL}",
42
+ max_new_tokens=300,
43
  top_k=10,
44
  top_p=0.95,
45
  typical_p=0.95,
 
54
  )
55
 
56
  rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
 
 
 
 
 
 
57
 
58
  def process_file(file: AskFileResponse):
59
  import tempfile
 
72
  return docs
73
 
74
 
 
75
  @cl.on_chat_start
76
  async def on_chat_start():
 
77
  files = None
78
 
79
  while files == None:
 
 
80
  files = await cl.AskFileMessage(
81
  content="Please upload a PDF file to begin!",
82
  accept=["application/pdf"],
 
92
  await msg.send()
93
  docs = process_file(file)
94
 
95
+ # QDrant Client Set-up
96
  collection_name = f"pdf_to_parse_{uuid.uuid4()}"
97
  client = QdrantClient(":memory:")
98
  client.create_collection(
99
  collection_name=collection_name,
100
+ vectors_config=VectorParams(size=768, distance=Distance.COSINE),
101
  )
102
 
103
  # Adding cache!
104
+ # store = LocalFileStore("./cache/")
105
+ # cached_embedder = CacheBackedEmbeddings.from_bytes_store(
106
+ # hf_embeddings, store, namespace=hf_embeddings.model
107
+ # )
108
 
109
  # Typical QDrant Vector Store Set-up
110
  vectorstore = QdrantVectorStore(
111
  client=client,
112
  collection_name=collection_name,
113
+ embedding=hf_embeddings)
114
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
115
 
116
  for i in range(0, len(docs), 32):
117
  if i == 0:
118
+ retriever.add_documents(docs[i:i+32])
119
  continue
120
+ retriever.add_documents(docs[i:i+32])
121
 
 
122
 
123
  retrieval_augmented_qa_chain = (
124
+ {"context": itemgetter("query") | retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
 
 
125
  )
126
 
127
  # Let the user know that the system is ready
 
141
  ### On Message Section ###
142
  @cl.on_message
143
  async def main(message: cl.Message):
 
 
 
144
  runnable = cl.user_session.get("chain")
145
 
146
  msg = cl.Message(content="")
147
 
 
 
148
  async for chunk in runnable.astream(
149
+ {"query": message.content},
150
  config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
151
  ):
152
+ await msg.stream_token(chunk)
153
+
154
+ await msg.send()
155
 
156
+ if __name__ == "__main__":
157
+ from chainlit.cli import run_chainlit
158
+ run_chainlit(__file__)
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! 🚀🤖
2
+
3
+ Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links 🔗
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.