Spaces:
Starting
on
T4
Starting
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -112,7 +112,7 @@ def main(query: str, client: QdrantClient, collection_name: str, llm: Llama, den
|
|
112 |
response = llm.generate(prompts=inputs_1, sampling_params=sampling_params_1)
|
113 |
|
114 |
text = response[0].outputs[0].text
|
115 |
-
print(f'TEXT: {
|
116 |
|
117 |
sampling_params_2 = vllm.SamplingParams(temperature=0.75, max_tokens=3000, stop_token_ids=stop_token_ids)
|
118 |
prompt_2 = [
|
@@ -420,8 +420,7 @@ if __name__ == '__main__':
|
|
420 |
if st.session_state.chat_id not in conversations.keys():
|
421 |
stop_token_ids = [151329, 151336, 151338]
|
422 |
sampling_params = vllm.SamplingParams(temperature=0.75, max_tokens=35, stop_token_ids=stop_token_ids)
|
423 |
-
|
424 |
-
prompt = [{"role": "user", "content": str}]
|
425 |
inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
426 |
outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
|
427 |
|
|
|
112 |
response = llm.generate(prompts=inputs_1, sampling_params=sampling_params_1)
|
113 |
|
114 |
text = response[0].outputs[0].text
|
115 |
+
print(f'TEXT: {response}')
|
116 |
|
117 |
sampling_params_2 = vllm.SamplingParams(temperature=0.75, max_tokens=3000, stop_token_ids=stop_token_ids)
|
118 |
prompt_2 = [
|
|
|
420 |
if st.session_state.chat_id not in conversations.keys():
|
421 |
stop_token_ids = [151329, 151336, 151338]
|
422 |
sampling_params = vllm.SamplingParams(temperature=0.75, max_tokens=35, stop_token_ids=stop_token_ids)
|
423 |
+
prompt = [{"role": "user", "content": f"{prompt}\nExplain the above in one sentence:"}]
|
|
|
424 |
inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
425 |
outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
|
426 |
|