Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Paused

App Files Files Community

devve1 commited on Jul 21

Commit

3eae162

•

1 Parent(s): 5b4fa5e

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -35

app.py CHANGED Viewed

@@ -24,10 +24,9 @@ from unstructured.nlp.tokenize import download_nltk_packages
 from huggingface_hub import snapshot_download, hf_hub_download
 from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
 from langchain_experimental.text_splitter import SemanticChunker
-from mistral_common.protocol.instruct.messages import UserMessage
 from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
 from mistral_common.protocol.instruct.request import ChatCompletionRequest
-from mistral_common
 from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
 from qdrant_client.models import (
     NamedSparseVector,
@@ -85,7 +84,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
         limit=3
     )
-def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
     docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
@@ -101,40 +100,51 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
         if (value not in seen_values and not seen_values.add(value))
     )
-    sampling_params_1 = vllm.SamplingParams(temperature=0, max_tokens=3000)
-    prompt_1 = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant."
-        },
-        {
-            "role": "user",
-            "content": st.session_state.qa_prompt(query, context)
-        }
-    ]
-    inputs_1 = tokenizer.apply_chat_template(prompt_1, tokenize=False, add_generation_prompt=True)
-    response = llm.generate(prompts=inputs_1, sampling_params=sampling_params_1)
-    text = response[0].outputs[0].text
     print(f'TEXT: {response}')
-    sampling_params_2 = vllm.SamplingParams(temperature=0.75, max_tokens=3000)
-    prompt_2 = [
-        {"role": "system",
-         "content": """Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
-         Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""
-        },
-        {
-            "role": "user",
-            "content": f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
-            ```{text}```
-            Let's think step-by-step."""
-        }
-    ]
-    inputs_2 = tokenizer.apply_chat_template(prompt_2, tokenize=False, add_generation_prompt=True)
-    output = llm.generate(prompts=inputs_2, sampling_params=sampling_params_2)
     answer = output[0].outputs[0].text
     answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
@@ -482,7 +492,7 @@ if __name__ == '__main__':
                 outputs = llm.generate(
                     prompts=tokenized.text,
                     sampling_params=vllm.SamplingParams(
-                        temperature=0.75,
                         max_tokens=20,
                         stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
                         detokenize=False
@@ -504,7 +514,7 @@ if __name__ == '__main__':
             st.chat_message("user").markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             print(f'PROMPT: {prompt}')
-            ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model)
             with st.chat_message("assistant"):
                 message_placeholder = st.empty()
                 full_response = ""

 from huggingface_hub import snapshot_download, hf_hub_download
 from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
 from langchain_experimental.text_splitter import SemanticChunker
 from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
 from mistral_common.protocol.instruct.request import ChatCompletionRequest
+from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
 from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
 from qdrant_client.models import (
     NamedSparseVector,
         limit=3
     )
+def main(query: str, client: QdrantClient, collection_name: str, tokenizer: MistralTokenizer, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
     docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
         if (value not in seen_values and not seen_values.add(value))
     )
+    tokenized = tokenizer.encode_chat_completion(
+        ChatCompletionRequest(
+            messages=[
+                SystemMessage(content="You are a helpful assistant."),
+                UserMessage(content=st.session_state.qa_prompt(query, context))
+            ]
+        )
+    )
+    outputs = llm.generate(
+        prompts=tokenized.text,
+        sampling_params=vllm.SamplingParams(
+            temperature=0,
+            max_tokens=3000,
+            stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
+            detokenize=False
+        )
+    )
+    response = tokenizer.decode(outputs)
     print(f'TEXT: {response}')
+    text = response[0].outputs[0].text
+    tokenized_2 = tokenizer.encode_chat_completion(
+        ChatCompletionRequest(
+            messages=[
+                SystemMessage(content="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
+                Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""),
+                UserMessage(content=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
+                ```{text}```
+                Let's think step-by-step.""")
+            ]
+        )
+    )
+    outputs_2 = llm.generate(
+        prompts=tokenized_2.text,
+        sampling_params=vllm.SamplingParams(
+            temperature=0.3,
+            max_tokens=3000,
+            stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
+            detokenize=False
+        )
+    )
+    output = tokenizer.decode(outputs_2)
     answer = output[0].outputs[0].text
     answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
                 outputs = llm.generate(
                     prompts=tokenized.text,
                     sampling_params=vllm.SamplingParams(
+                        temperature=0.3,
                         max_tokens=20,
                         stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
                         detokenize=False
             st.chat_message("user").markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             print(f'PROMPT: {prompt}')
+            ai_response = main(prompt, client, collection_name, tokenizer, llm, dense_model, sparse_model)
             with st.chat_message("assistant"):
                 message_placeholder = st.empty()
                 full_response = ""