Update app.py
Browse files
app.py
CHANGED
@@ -24,10 +24,9 @@ from unstructured.nlp.tokenize import download_nltk_packages
|
|
24 |
from huggingface_hub import snapshot_download, hf_hub_download
|
25 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
26 |
from langchain_experimental.text_splitter import SemanticChunker
|
27 |
-
from mistral_common.protocol.instruct.messages import UserMessage
|
28 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
29 |
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
30 |
-
from mistral_common
|
31 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
32 |
from qdrant_client.models import (
|
33 |
NamedSparseVector,
|
@@ -85,7 +84,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
|
|
85 |
limit=3
|
86 |
)
|
87 |
|
88 |
-
def main(query: str, client: QdrantClient, collection_name: str, llm:
|
89 |
scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
|
90 |
|
91 |
docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
|
@@ -101,40 +100,51 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
|
|
101 |
if (value not in seen_values and not seen_values.add(value))
|
102 |
)
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
text = response[0].outputs[0].text
|
119 |
print(f'TEXT: {response}')
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
"content": f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
|
130 |
-
|
131 |
-
```{text}```
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
answer = output[0].outputs[0].text
|
140 |
answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
|
@@ -482,7 +492,7 @@ if __name__ == '__main__':
|
|
482 |
outputs = llm.generate(
|
483 |
prompts=tokenized.text,
|
484 |
sampling_params=vllm.SamplingParams(
|
485 |
-
temperature=0.
|
486 |
max_tokens=20,
|
487 |
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
|
488 |
detokenize=False
|
@@ -504,7 +514,7 @@ if __name__ == '__main__':
|
|
504 |
st.chat_message("user").markdown(prompt)
|
505 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
506 |
print(f'PROMPT: {prompt}')
|
507 |
-
ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model)
|
508 |
with st.chat_message("assistant"):
|
509 |
message_placeholder = st.empty()
|
510 |
full_response = ""
|
|
|
24 |
from huggingface_hub import snapshot_download, hf_hub_download
|
25 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
26 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
27 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
28 |
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
29 |
+
from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
|
30 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
31 |
from qdrant_client.models import (
|
32 |
NamedSparseVector,
|
|
|
84 |
limit=3
|
85 |
)
|
86 |
|
87 |
+
def main(query: str, client: QdrantClient, collection_name: str, tokenizer: MistralTokenizer, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
88 |
scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
|
89 |
|
90 |
docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
|
|
|
100 |
if (value not in seen_values and not seen_values.add(value))
|
101 |
)
|
102 |
|
103 |
+
tokenized = tokenizer.encode_chat_completion(
|
104 |
+
ChatCompletionRequest(
|
105 |
+
messages=[
|
106 |
+
SystemMessage(content="You are a helpful assistant."),
|
107 |
+
UserMessage(content=st.session_state.qa_prompt(query, context))
|
108 |
+
]
|
109 |
+
)
|
110 |
+
)
|
111 |
+
outputs = llm.generate(
|
112 |
+
prompts=tokenized.text,
|
113 |
+
sampling_params=vllm.SamplingParams(
|
114 |
+
temperature=0,
|
115 |
+
max_tokens=3000,
|
116 |
+
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
|
117 |
+
detokenize=False
|
118 |
+
)
|
119 |
+
)
|
120 |
+
response = tokenizer.decode(outputs)
|
121 |
|
|
|
122 |
print(f'TEXT: {response}')
|
123 |
+
text = response[0].outputs[0].text
|
124 |
|
125 |
+
tokenized_2 = tokenizer.encode_chat_completion(
|
126 |
+
ChatCompletionRequest(
|
127 |
+
messages=[
|
128 |
+
SystemMessage(content="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
|
129 |
+
Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""),
|
130 |
+
UserMessage(content=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
|
131 |
+
|
132 |
+
```{text}```
|
|
|
|
|
|
|
133 |
|
134 |
+
Let's think step-by-step.""")
|
135 |
+
]
|
136 |
+
)
|
137 |
+
)
|
138 |
+
outputs_2 = llm.generate(
|
139 |
+
prompts=tokenized_2.text,
|
140 |
+
sampling_params=vllm.SamplingParams(
|
141 |
+
temperature=0.3,
|
142 |
+
max_tokens=3000,
|
143 |
+
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
|
144 |
+
detokenize=False
|
145 |
+
)
|
146 |
+
)
|
147 |
+
output = tokenizer.decode(outputs_2)
|
148 |
|
149 |
answer = output[0].outputs[0].text
|
150 |
answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
|
|
|
492 |
outputs = llm.generate(
|
493 |
prompts=tokenized.text,
|
494 |
sampling_params=vllm.SamplingParams(
|
495 |
+
temperature=0.3,
|
496 |
max_tokens=20,
|
497 |
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
|
498 |
detokenize=False
|
|
|
514 |
st.chat_message("user").markdown(prompt)
|
515 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
516 |
print(f'PROMPT: {prompt}')
|
517 |
+
ai_response = main(prompt, client, collection_name, tokenizer, llm, dense_model, sparse_model)
|
518 |
with st.chat_message("assistant"):
|
519 |
message_placeholder = st.empty()
|
520 |
full_response = ""
|