Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Paused

App Files Files Community

devve1 commited on Jul 22

Commit

62ab310

•

1 Parent(s): 0c81177

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -40

app.py CHANGED Viewed

@@ -26,9 +26,6 @@ from fastembed import SparseEmbedding, SparseTextEmbedding
 from unstructured.nlp.tokenize import download_nltk_packages
 from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
 from langchain_experimental.text_splitter import SemanticChunker
-from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
-from mistral_common.protocol.instruct.request import ChatCompletionRequest
-from mistral_common.protocol.instruct.messages import SystemMessage, UserMessage
 from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
 from qdrant_client.models import (
     NamedSparseVector,
@@ -86,7 +83,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
         limit=3
     )
-def main(query: str, client: QdrantClient, collection_name: str, tokenizer: MistralTokenizer, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
     docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
@@ -101,53 +98,37 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Mist
         for key, value in metadata.items()
         if (value not in seen_values and not seen_values.add(value))
     )
-    tokenized = tokenizer.encode_chat_completion(
-        ChatCompletionRequest(
-            messages=[
-                SystemMessage(content="You are a helpful assistant."),
-                UserMessage(content=st.session_state.qa_prompt(query, context))
-            ]
-        )
-    )
-    print(f'Tokenized text: {tokenized.text}')
     outputs = llm.generate(
-        prompts=tokenized.text,
         sampling_params=vllm.SamplingParams(
             temperature=0,
-            max_tokens=3000,
-            stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id]
         )
     )
     print(f'TEXT: {outputs}')
     text = outputs[0].outputs[0].text
-    tokenized_2 = tokenizer.encode_chat_completion(
-        ChatCompletionRequest(
-            messages=[
-                SystemMessage(content="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
-                Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases."""),
-                UserMessage(content=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
-                ```{text}```
-                Let's think step-by-step.""")
-            ]
-        )
-    )
     outputs_2 = llm.generate(
-        prompts=tokenized_2.text,
         sampling_params=vllm.SamplingParams(
             temperature=0.3,
-            max_tokens=3000,
-            stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id],
-            detokenize=False
         )
     )
-    output = tokenizer.decode(outputs_2[0])
-    answer = output[0].outputs[0].text
     answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
     print(f'OUTPUT: {output}')
@@ -162,7 +143,11 @@ def load_models_and_documents():
     with st.spinner('Load models...'):
         model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
-        tokenizer = MistralTokenizer.from_file(f"{model_path}/tokenizer.model.v3")
         llm = vllm.LLM(
             model_path,
@@ -311,7 +296,7 @@ def load_models_and_documents():
         optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
     )
-    return client, collection_name, tokenizer, model, llm, dense_model, sparse_model
 def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     text_splitter = SemanticChunker(
@@ -388,7 +373,7 @@ if __name__ == '__main__':
     if "tooltip" not in st.session_state:
         st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
-    client, collection_name, tokenizer, model, llm, dense_model, sparse_model = load_models_and_documents()
     if 'df' not in st.session_state:
         st.session_state.df = pd.DataFrame([0])
@@ -481,7 +466,7 @@ if __name__ == '__main__':
             st.chat_message("user").markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             print(f'PROMPT: {prompt}')
-            ai_response = main(prompt, client, collection_name, tokenizer, llm, dense_model, sparse_model)
             with st.chat_message("assistant"):
                 message_placeholder = st.empty()
                 full_response = ""

 from unstructured.nlp.tokenize import download_nltk_packages
 from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
 from langchain_experimental.text_splitter import SemanticChunker
 from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
 from qdrant_client.models import (
     NamedSparseVector,
         limit=3
     )
+def main(query: str, client: QdrantClient, collection_name: str, template, llm: vllm.LLM, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     scored_points = query_hybrid_search(query, client, collection_name, dense_model, sparse_model).points
     docs = [(scored_point.payload['text'], scored_point.payload['metadata']) for scored_point in scored_points]
         for key, value in metadata.items()
         if (value not in seen_values and not seen_values.add(value))
     )
+    prompts = template.format(system='You are a helpful assistant.', user=st.session_state.qa_prompt(query, context))
     outputs = llm.generate(
+        prompts=prompts,
         sampling_params=vllm.SamplingParams(
             temperature=0,
+            max_tokens=3000
         )
     )
     print(f'TEXT: {outputs}')
     text = outputs[0].outputs[0].text
+    prompts_2 = template.format(system="""Act like a professional summary writer. You have been providing summarization services for various types of documents, including academic papers, legal texts, and business reports, for over 20 years.
+    Your expertise includes extracting key points and important details concisely without adding unnecessary introductory phrases.""",
+                               user=f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Do not introduce the summary with sentences like "Here is the summary:" or similar. The summary should be detailed, precise, and directly convey the essential information from the text.
+                               ```{text}```
+                               Let's think step-by-step.""")
     outputs_2 = llm.generate(
+        prompts=prompts_2,
         sampling_params=vllm.SamplingParams(
             temperature=0.3,
+            max_tokens=3000
         )
     )
+    answer = outputs_2[0].outputs[0].text
     answer_with_metadatas = f"{answer}\n\n\nSource(s) :\n\n{result_metadatas}"
     print(f'OUTPUT: {output}')
     with st.spinner('Load models...'):
         model_path = snapshot_download(repo_id="GameScribes/Mistral-v0.3-AWQ")
+        template = """[INST] <<SYS>>
+        {system}
+        <</SYS>>
+        {user} [/INST]"""
         llm = vllm.LLM(
             model_path,
         optimizer_config=OptimizersConfigDiff(indexing_threshold=20000)
     )
+    return client, collection_name, template, model, llm, dense_model, sparse_model
 def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
     text_splitter = SemanticChunker(
     if "tooltip" not in st.session_state:
         st.session_state.tooltip = 'The AI answer your questions only considering the documents provided'
+    client, collection_name, template, model, llm, dense_model, sparse_model = load_models_and_documents()
     if 'df' not in st.session_state:
         st.session_state.df = pd.DataFrame([0])
             st.chat_message("user").markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             print(f'PROMPT: {prompt}')
+            ai_response = main(prompt, client, collection_name, template, llm, dense_model, sparse_model)
             with st.chat_message("assistant"):
                 message_placeholder = st.empty()
                 full_response = ""