devve1 commited on
Commit
0886785
1 Parent(s): 346e521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -109,7 +109,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
109
  {"role": "system", "content": 'You are a helpful assistant.'},
110
  {"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
111
  ]
112
- prompts = tokenizer.apply_chat_template(messages, tokenize=False)
113
 
114
  outputs = llm.generate(
115
  prompts=prompts,
@@ -133,7 +133,7 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Auto
133
  Let's think step-by-step."""
134
  }
135
  ]
136
- prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False)
137
 
138
  outputs_2 = llm.generate(
139
  prompts=prompts_2,
@@ -163,7 +163,8 @@ def load_models_and_documents():
163
  filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
164
  )
165
  tokenizer_path = snapshot_download(
166
- 'mistralai/Mistral-Nemo-Instruct-2407',
 
167
  ignore_patterns=['*.safetensors',
168
  'model.safetensors.index.json',
169
  'params.json',
@@ -549,7 +550,7 @@ if __name__ == '__main__':
549
  Question : {st.session_state.user_input}"""
550
  }
551
  ]
552
- prompts = tokenizer.apply_chat_template(messages, tokenize=False)
553
  outputs = llm.generate(
554
  prompts=prompts,
555
  sampling_params=vllm.SamplingParams(
 
109
  {"role": "system", "content": 'You are a helpful assistant.'},
110
  {"role": "user", "content": st.session_state.toggle_docs['qa_prompt'].format(**args)}
111
  ]
112
+ prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
113
 
114
  outputs = llm.generate(
115
  prompts=prompts,
 
133
  Let's think step-by-step."""
134
  }
135
  ]
136
+ prompts_2 = tokenizer.apply_chat_template(messages_2, tokenize=False, add_generation_prompt=True)
137
 
138
  outputs_2 = llm.generate(
139
  prompts=prompts_2,
 
163
  filename='Mistral-Nemo-Instruct-2407-Q5_K_M.gguf'
164
  )
165
  tokenizer_path = snapshot_download(
166
+ 'mistralai/Mistral-Nemo-Instruct-2407',
167
+ token=os.getenv('AccessMistralNemo'),
168
  ignore_patterns=['*.safetensors',
169
  'model.safetensors.index.json',
170
  'params.json',
 
550
  Question : {st.session_state.user_input}"""
551
  }
552
  ]
553
+ prompts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
554
  outputs = llm.generate(
555
  prompts=prompts,
556
  sampling_params=vllm.SamplingParams(