Update app.py
Browse files
app.py
CHANGED
@@ -26,9 +26,6 @@ from fastembed import SparseEmbedding, SparseTextEmbedding
|
|
26 |
from unstructured.nlp.tokenize import download_nltk_packages
|
27 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
28 |
from langchain_experimental.text_splitter import SemanticChunker
|
29 |
-
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
30 |
-
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
31 |
-
from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
|
32 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
33 |
from qdrant_client.models import (
|
34 |
NamedSparseVector,
|
@@ -86,7 +83,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
|
|
86 |
limit=3
|
87 |
)
|
88 |
|
89 |
-
def main(query: str, client: QdrantClient, collection_name: str,
|
90 |
scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
|
91 |
|
92 |
docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
|
@@ -101,53 +98,37 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Mist
|
|
101 |
for key, value in metadata.items()
|
102 |
if (value not in seen_values and not seen_values.add(value))
|
103 |
)
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
messages=[
|
108 |
-
SystemMessage(content="You are a helpful assistant."),
|
109 |
-
UserMessage(content=st.session_state.qa_prompt(query, context))
|
110 |
-
]
|
111 |
-
)
|
112 |
-
)
|
113 |
-
print(f'Tokenized text: {tokenized.text}')
|
114 |
outputs = llm.generate(
|
115 |
-
prompts=
|
116 |
sampling_params=vllm.SamplingParams(
|
117 |
temperature=0,
|
118 |
-
max_tokens=3000
|
119 |
-
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id]
|
120 |
)
|
121 |
)
|
122 |
print(f'TEXT: {outputs}')
|
123 |
|
124 |
text = outputs[0].outputs[0].text
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
Let's think step-by-step.""")
|
136 |
-
]
|
137 |
-
)
|
138 |
-
)
|
139 |
outputs_2 = llm.generate(
|
140 |
-
prompts=
|
141 |
sampling_params=vllm.SamplingParams(
|
142 |
temperature=0.3,
|
143 |
-
max_tokens=3000
|
144 |
-
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
|
145 |
-
detokenize=False
|
146 |
)
|
147 |
)
|
148 |
-
output = tokenizer.decode(outputs_2[0])
|
149 |
|
150 |
-
answer =
|
151 |
answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
|
152 |
|
153 |
print(f'OUTPUT: {output}')
|
@@ -162,7 +143,11 @@ def load_models_and_documents():
|
|
162 |
with st.spinner('Load models...'):
|
163 |
model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
|
164 |
|
165 |
-
|
|
|
|
|
|
|
|
|
166 |
|
167 |
llm = vllm.LLM(
|
168 |
model_path,
|
@@ -311,7 +296,7 @@ def load_models_and_documents():
|
|
311 |
optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
|
312 |
)
|
313 |
|
314 |
-
return client, collection_name,
|
315 |
|
316 |
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
317 |
text_splitter = SemanticChunker(
|
@@ -388,7 +373,7 @@ if __name__ == '__main__':
|
|
388 |
if "tooltip" not in st.session_state:
|
389 |
st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
|
390 |
|
391 |
-
client, collection_name,
|
392 |
|
393 |
if 'df' not in st.session_state:
|
394 |
st.session_state.df = pd.DataFrame([0])
|
@@ -481,7 +466,7 @@ if __name__ == '__main__':
|
|
481 |
st.chat_message("user").markdown(prompt)
|
482 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
483 |
print(f'PROMPT: {prompt}')
|
484 |
-
ai_response = main(prompt, client, collection_name,
|
485 |
with st.chat_message("assistant"):
|
486 |
message_placeholder = st.empty()
|
487 |
full_response = ""
|
|
|
26 |
from unstructured.nlp.tokenize import download_nltk_packages
|
27 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
28 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
|
|
|
|
29 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
30 |
from qdrant_client.models import (
|
31 |
NamedSparseVector,
|
|
|
83 |
limit=3
|
84 |
)
|
85 |
|
86 |
+
def main(query: str, client: QdrantClient, collection_name: str, template, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
87 |
scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
|
88 |
|
89 |
docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
|
|
|
98 |
for key, value in metadata.items()
|
99 |
if (value not in seen_values and not seen_values.add(value))
|
100 |
)
|
101 |
+
|
102 |
+
prompts = template.format(system='You are a helpful assistant.', user=st.session_state.qa_prompt(query, context))
|
103 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
outputs = llm.generate(
|
105 |
+
prompts=prompts,
|
106 |
sampling_params=vllm.SamplingParams(
|
107 |
temperature=0,
|
108 |
+
max_tokens=3000
|
|
|
109 |
)
|
110 |
)
|
111 |
print(f'TEXT: {outputs}')
|
112 |
|
113 |
text = outputs[0].outputs[0].text
|
114 |
|
115 |
+
prompts_2 = template.format(system="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
|
116 |
+
Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases.""",
|
117 |
+
user=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
|
118 |
+
|
119 |
+
```{text}```
|
120 |
+
|
121 |
+
Let's think step-by-step.""")
|
122 |
+
|
|
|
|
|
|
|
|
|
|
|
123 |
outputs_2 = llm.generate(
|
124 |
+
prompts=prompts_2,
|
125 |
sampling_params=vllm.SamplingParams(
|
126 |
temperature=0.3,
|
127 |
+
max_tokens=3000
|
|
|
|
|
128 |
)
|
129 |
)
|
|
|
130 |
|
131 |
+
answer = outputs_2[0].outputs[0].text
|
132 |
answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
|
133 |
|
134 |
print(f'OUTPUT: {output}')
|
|
|
143 |
with st.spinner('Load models...'):
|
144 |
model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
|
145 |
|
146 |
+
template = """[INST] <<SYS>>
|
147 |
+
{system}
|
148 |
+
<</SYS>>
|
149 |
+
|
150 |
+
{user} [/INST]"""
|
151 |
|
152 |
llm = vllm.LLM(
|
153 |
model_path,
|
|
|
296 |
optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
|
297 |
)
|
298 |
|
299 |
+
return client, collection_name, template, model, llm, dense_model, sparse_model
|
300 |
|
301 |
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
302 |
text_splitter = SemanticChunker(
|
|
|
373 |
if "tooltip" not in st.session_state:
|
374 |
st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
|
375 |
|
376 |
+
client, collection_name, template, model, llm, dense_model, sparse_model = load_models_and_documents()
|
377 |
|
378 |
if 'df' not in st.session_state:
|
379 |
st.session_state.df = pd.DataFrame([0])
|
|
|
466 |
st.chat_message("user").markdown(prompt)
|
467 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
468 |
print(f'PROMPT: {prompt}')
|
469 |
+
ai_response = main(prompt, client, collection_name, template, llm, dense_model, sparse_model)
|
470 |
with st.chat_message("assistant"):
|
471 |
message_placeholder = st.empty()
|
472 |
full_response = ""
|