devve1 commited on
Commit
62ab310
1 Parent(s): 0c81177

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -40
app.py CHANGED
@@ -26,9 +26,6 @@ from fastembed import SparseEmbedding, SparseTextEmbedding
26
  from unstructured.nlp.tokenize import download_nltk_packages
27
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
28
  from langchain_experimental.text_splitter import SemanticChunker
29
- from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
30
- from mistral_common.protocol.instruct.request import ChatCompletionRequest
31
- from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
32
  from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
33
  from qdrant_client.models import (
34
  NamedSparseVector,
@@ -86,7 +83,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
86
  limit=3
87
  )
88
 
89
- def main(query: str, client: QdrantClient, collection_name: str, tokenizer: MistralTokenizer, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
90
  scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
91
 
92
  docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
@@ -101,53 +98,37 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Mist
101
  for key, value in metadata.items()
102
  if (value not in seen_values and not seen_values.add(value))
103
  )
104
-
105
- tokenized = tokenizer.encode_chat_completion(
106
- ChatCompletionRequest(
107
- messages=[
108
- SystemMessage(content="You are a helpful assistant."),
109
- UserMessage(content=st.session_state.qa_prompt(query, context))
110
- ]
111
- )
112
- )
113
- print(f'Tokenized text: {tokenized.text}')
114
  outputs = llm.generate(
115
- prompts=tokenized.text,
116
  sampling_params=vllm.SamplingParams(
117
  temperature=0,
118
- max_tokens=3000,
119
- stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id]
120
  )
121
  )
122
  print(f'TEXT: {outputs}')
123
 
124
  text = outputs[0].outputs[0].text
125
 
126
- tokenized_2 = tokenizer.encode_chat_completion(
127
- ChatCompletionRequest(
128
- messages=[
129
- SystemMessage(content="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
130
- Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""),
131
- UserMessage(content=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
132
-
133
- ```{text}```
134
-
135
- Let's think step-by-step.""")
136
- ]
137
- )
138
- )
139
  outputs_2 = llm.generate(
140
- prompts=tokenized_2.text,
141
  sampling_params=vllm.SamplingParams(
142
  temperature=0.3,
143
- max_tokens=3000,
144
- stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
145
- detokenize=False
146
  )
147
  )
148
- output = tokenizer.decode(outputs_2[0])
149
 
150
- answer = output[0].outputs[0].text
151
  answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
152
 
153
  print(f'OUTPUT: {output}')
@@ -162,7 +143,11 @@ def load_models_and_documents():
162
  with st.spinner('Load models...'):
163
  model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
164
 
165
- tokenizer = MistralTokenizer.from_file(f"{model_path}/tokenizer.model.v3")
 
 
 
 
166
 
167
  llm = vllm.LLM(
168
  model_path,
@@ -311,7 +296,7 @@ def load_models_and_documents():
311
  optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
312
  )
313
 
314
- return client, collection_name, tokenizer, model, llm, dense_model, sparse_model
315
 
316
  def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
317
  text_splitter = SemanticChunker(
@@ -388,7 +373,7 @@ if __name__ == '__main__':
388
  if "tooltip" not in st.session_state:
389
  st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
390
 
391
- client, collection_name, tokenizer, model, llm, dense_model, sparse_model = load_models_and_documents()
392
 
393
  if 'df' not in st.session_state:
394
  st.session_state.df = pd.DataFrame([0])
@@ -481,7 +466,7 @@ if __name__ == '__main__':
481
  st.chat_message("user").markdown(prompt)
482
  st.session_state.messages.append({"role": "user", "content": prompt})
483
  print(f'PROMPT: {prompt}')
484
- ai_response = main(prompt, client, collection_name, tokenizer, llm, dense_model, sparse_model)
485
  with st.chat_message("assistant"):
486
  message_placeholder = st.empty()
487
  full_response = ""
 
26
  from unstructured.nlp.tokenize import download_nltk_packages
27
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
28
  from langchain_experimental.text_splitter import SemanticChunker
 
 
 
29
  from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
30
  from qdrant_client.models import (
31
  NamedSparseVector,
 
83
  limit=3
84
  )
85
 
86
+ def main(query: str, client: QdrantClient, collection_name: str, template, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
87
  scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
88
 
89
  docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
 
98
  for key, value in metadata.items()
99
  if (value not in seen_values and not seen_values.add(value))
100
  )
101
+
102
+ prompts = template.format(system='You are a helpful assistant.', user=st.session_state.qa_prompt(query, context))
103
+
 
 
 
 
 
 
 
104
  outputs = llm.generate(
105
+ prompts=prompts,
106
  sampling_params=vllm.SamplingParams(
107
  temperature=0,
108
+ max_tokens=3000
 
109
  )
110
  )
111
  print(f'TEXT: {outputs}')
112
 
113
  text = outputs[0].outputs[0].text
114
 
115
+ prompts_2 = template.format(system="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
116
+ Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases.""",
117
+ user=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
118
+
119
+ ```{text}```
120
+
121
+ Let's think step-by-step.""")
122
+
 
 
 
 
 
123
  outputs_2 = llm.generate(
124
+ prompts=prompts_2,
125
  sampling_params=vllm.SamplingParams(
126
  temperature=0.3,
127
+ max_tokens=3000
 
 
128
  )
129
  )
 
130
 
131
+ answer = outputs_2[0].outputs[0].text
132
  answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
133
 
134
  print(f'OUTPUT: {output}')
 
143
  with st.spinner('Load models...'):
144
  model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
145
 
146
+ template = """[INST] <<SYS>>
147
+ {system}
148
+ <</SYS>>
149
+
150
+ {user} [/INST]"""
151
 
152
  llm = vllm.LLM(
153
  model_path,
 
296
  optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
297
  )
298
 
299
+ return client, collection_name, template, model, llm, dense_model, sparse_model
300
 
301
  def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
302
  text_splitter = SemanticChunker(
 
373
  if "tooltip" not in st.session_state:
374
  st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
375
 
376
+ client, collection_name, template, model, llm, dense_model, sparse_model = load_models_and_documents()
377
 
378
  if 'df' not in st.session_state:
379
  st.session_state.df = pd.DataFrame([0])
 
466
  st.chat_message("user").markdown(prompt)
467
  st.session_state.messages.append({"role": "user", "content": prompt})
468
  print(f'PROMPT: {prompt}')
469
+ ai_response = main(prompt, client, collection_name, template, llm, dense_model, sparse_model)
470
  with st.chat_message("assistant"):
471
  message_placeholder = st.empty()
472
  full_response = ""