Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -109,7 +109,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
|
|
109 |
{"role": "system", "content": 'You are a helpful assistant.'},
|
110 |
{"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
|
111 |
]
|
112 |
-
prompts = tokenizer.apply_chat_template(messages, tokenize=False)
|
113 |
|
114 |
outputs = llm.generate(
|
115 |
prompts=prompts,
|
@@ -133,7 +133,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
|
|
133 |
Let's think step-by-step."""
|
134 |
}
|
135 |
]
|
136 |
-
prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False)
|
137 |
|
138 |
outputs_2 = llm.generate(
|
139 |
prompts=prompts_2,
|
@@ -163,7 +163,8 @@ def load_models_and_documents():
|
|
163 |
filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
|
164 |
)
|
165 |
tokenizer_path = snapshot_download(
|
166 |
-
'mistralai/Mistral-Nemo-Instruct-2407',
|
|
|
167 |
ignore_patterns=['*.safetensors',
|
168 |
'model.safetensors.index.json',
|
169 |
'params.json',
|
@@ -549,7 +550,7 @@ if __name__ == '__main__':
|
|
549 |
Question : {st.session_state.user_input}"""
|
550 |
}
|
551 |
]
|
552 |
-
prompts = tokenizer.apply_chat_template(messages, tokenize=False)
|
553 |
outputs = llm.generate(
|
554 |
prompts=prompts,
|
555 |
sampling_params=vllm.SamplingParams(
|
|
|
109 |
{"role": "system", "content": 'You are a helpful assistant.'},
|
110 |
{"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
|
111 |
]
|
112 |
+
prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
113 |
|
114 |
outputs = llm.generate(
|
115 |
prompts=prompts,
|
|
|
133 |
Let's think step-by-step."""
|
134 |
}
|
135 |
]
|
136 |
+
prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False, add_generation_prompt=True)
|
137 |
|
138 |
outputs_2 = llm.generate(
|
139 |
prompts=prompts_2,
|
|
|
163 |
filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
|
164 |
)
|
165 |
tokenizer_path = snapshot_download(
|
166 |
+
'mistralai/Mistral-Nemo-Instruct-2407',
|
167 |
+
token=os.getenv('AccessMistralNemo'),
|
168 |
ignore_patterns=['*.safetensors',
|
169 |
'model.safetensors.index.json',
|
170 |
'params.json',
|
|
|
550 |
Question : {st.session_state.user_input}"""
|
551 |
}
|
552 |
]
|
553 |
+
prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
554 |
outputs = llm.generate(
|
555 |
prompts=prompts,
|
556 |
sampling_params=vllm.SamplingParams(
|