devve1 commited on
Commit
3eae162
1 Parent(s): 5b4fa5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -35
app.py CHANGED
@@ -24,10 +24,9 @@ from unstructured.nlp.tokenize import download_nltk_packages
24
  from huggingface_hub import snapshot_download, hf_hub_download
25
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
26
  from langchain_experimental.text_splitter import SemanticChunker
27
- from mistral_common.protocol.instruct.messages import UserMessage
28
  from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
29
  from mistral_common.protocol.instruct.request import ChatCompletionRequest
30
- from mistral_common
31
  from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
32
  from qdrant_client.models import (
33
  NamedSparseVector,
@@ -85,7 +84,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
85
  limit=3
86
  )
87
 
88
- def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
89
  scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
90
 
91
  docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
@@ -101,40 +100,51 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
101
  if (value not in seen_values and not seen_values.add(value))
102
  )
103
 
104
- sampling_params_1 = vllm.SamplingParams(temperature=0, max_tokens=3000)
105
- prompt_1 = [
106
- {
107
- "role": "system",
108
- "content": "You are a helpful assistant."
109
- },
110
- {
111
- "role": "user",
112
- "content": st.session_state.qa_prompt(query, context)
113
- }
114
- ]
115
- inputs_1 = tokenizer.apply_chat_template(prompt_1, tokenize=False, add_generation_prompt=True)
116
- response = llm.generate(prompts=inputs_1, sampling_params=sampling_params_1)
 
 
 
 
 
117
 
118
- text = response[0].outputs[0].text
119
  print(f'TEXT: {response}')
 
120
 
121
- sampling_params_2 = vllm.SamplingParams(temperature=0.75, max_tokens=3000)
122
- prompt_2 = [
123
- {"role": "system",
124
- "content": """Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
125
- Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""
126
- },
127
- {
128
- "role": "user",
129
- "content": f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
130
-
131
- ```{text}```
132
 
133
- Let's think step-by-step."""
134
- }
135
- ]
136
- inputs_2 = tokenizer.apply_chat_template(prompt_2, tokenize=False, add_generation_prompt=True)
137
- output = llm.generate(prompts=inputs_2, sampling_params=sampling_params_2)
 
 
 
 
 
 
 
 
 
138
 
139
  answer = output[0].outputs[0].text
140
  answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
@@ -482,7 +492,7 @@ if __name__ == '__main__':
482
  outputs = llm.generate(
483
  prompts=tokenized.text,
484
  sampling_params=vllm.SamplingParams(
485
- temperature=0.75,
486
  max_tokens=20,
487
  stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
488
  detokenize=False
@@ -504,7 +514,7 @@ if __name__ == '__main__':
504
  st.chat_message("user").markdown(prompt)
505
  st.session_state.messages.append({"role": "user", "content": prompt})
506
  print(f'PROMPT: {prompt}')
507
- ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model)
508
  with st.chat_message("assistant"):
509
  message_placeholder = st.empty()
510
  full_response = ""
 
24
  from huggingface_hub import snapshot_download, hf_hub_download
25
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
26
  from langchain_experimental.text_splitter import SemanticChunker
 
27
  from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
28
  from mistral_common.protocol.instruct.request import ChatCompletionRequest
29
+ from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
30
  from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
31
  from qdrant_client.models import (
32
  NamedSparseVector,
 
84
  limit=3
85
  )
86
 
87
+ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: MistralTokenizer, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
88
  scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
89
 
90
  docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
 
100
  if (value not in seen_values and not seen_values.add(value))
101
  )
102
 
103
+ tokenized = tokenizer.encode_chat_completion(
104
+ ChatCompletionRequest(
105
+ messages=[
106
+ SystemMessage(content="You are a helpful assistant."),
107
+ UserMessage(content=st.session_state.qa_prompt(query, context))
108
+ ]
109
+ )
110
+ )
111
+ outputs = llm.generate(
112
+ prompts=tokenized.text,
113
+ sampling_params=vllm.SamplingParams(
114
+ temperature=0,
115
+ max_tokens=3000,
116
+ stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
117
+ detokenize=False
118
+ )
119
+ )
120
+ response = tokenizer.decode(outputs)
121
 
 
122
  print(f'TEXT: {response}')
123
+ text = response[0].outputs[0].text
124
 
125
+ tokenized_2 = tokenizer.encode_chat_completion(
126
+ ChatCompletionRequest(
127
+ messages=[
128
+ SystemMessage(content="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
129
+ Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""),
130
+ UserMessage(content=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
131
+
132
+ ```{text}```
 
 
 
133
 
134
+ Let's think step-by-step.""")
135
+ ]
136
+ )
137
+ )
138
+ outputs_2 = llm.generate(
139
+ prompts=tokenized_2.text,
140
+ sampling_params=vllm.SamplingParams(
141
+ temperature=0.3,
142
+ max_tokens=3000,
143
+ stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
144
+ detokenize=False
145
+ )
146
+ )
147
+ output = tokenizer.decode(outputs_2)
148
 
149
  answer = output[0].outputs[0].text
150
  answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
 
492
  outputs = llm.generate(
493
  prompts=tokenized.text,
494
  sampling_params=vllm.SamplingParams(
495
+ temperature=0.3,
496
  max_tokens=20,
497
  stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
498
  detokenize=False
 
514
  st.chat_message("user").markdown(prompt)
515
  st.session_state.messages.append({"role": "user", "content": prompt})
516
  print(f'PROMPT: {prompt}')
517
+ ai_response = main(prompt, client, collection_name, tokenizer, llm, dense_model, sparse_model)
518
  with st.chat_message("assistant"):
519
  message_placeholder = st.empty()
520
  full_response = ""