Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Running on T4

devve1 commited on Aug 9

Commit

0886785

•

1 Parent(s): 346e521

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -109,7 +109,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
         {"role": "system", "content": 'You are a helpful assistant.'},
         {"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
     ]
-    prompts = tokenizer.apply_chat_template(messages, tokenize=False)
     outputs = llm.generate(
         prompts=prompts,
@@ -133,7 +133,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
         Let's think step-by-step."""
         }
     ]
-    prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False)
     outputs_2 = llm.generate(
         prompts=prompts_2,
@@ -163,7 +163,8 @@ def load_models_and_documents():
             filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
         )
         tokenizer_path = snapshot_download(
-            'mistralai/Mistral-Nemo-Instruct-2407',
             ignore_patterns=['*.safetensors',
                              'model.safetensors.index.json',
                              'params.json',
@@ -549,7 +550,7 @@ if __name__ == '__main__':
                     Question : {st.session_state.user_input}"""
                     }
                 ]
-                prompts = tokenizer.apply_chat_template(messages, tokenize=False)
                 outputs = llm.generate(
                     prompts=prompts,
                     sampling_params=vllm.SamplingParams(

         {"role": "system", "content": 'You are a helpful assistant.'},
         {"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
     ]
+    prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     outputs = llm.generate(
         prompts=prompts,
         Let's think step-by-step."""
         }
     ]
+    prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False, add_generation_prompt=True)
     outputs_2 = llm.generate(
         prompts=prompts_2,
             filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
         )
         tokenizer_path = snapshot_download(
+            'mistralai/Mistral-Nemo-Instruct-2407',
+            token=os.getenv('AccessMistralNemo'),
             ignore_patterns=['*.safetensors',
                              'model.safetensors.index.json',
                              'params.json',
                     Question : {st.session_state.user_input}"""
                     }
                 ]
+                prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
                 outputs = llm.generate(
                     prompts=prompts,
                     sampling_params=vllm.SamplingParams(