Update app.py
Browse files
app.py
CHANGED
@@ -110,19 +110,18 @@ def main(query: str, client: QdrantClient, collection_name: str, tokenizer: Mist
|
|
110 |
]
|
111 |
)
|
112 |
)
|
|
|
113 |
outputs = llm.generate(
|
114 |
prompts=tokenized.text,
|
115 |
sampling_params=vllm.SamplingParams(
|
116 |
temperature=0,
|
117 |
max_tokens=3000,
|
118 |
-
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id]
|
119 |
-
detokenize=False
|
120 |
)
|
121 |
)
|
122 |
-
print(f'TEXT: {outputs
|
123 |
-
response = tokenizer.decode(outputs[0])
|
124 |
|
125 |
-
text =
|
126 |
|
127 |
tokenized_2 = tokenizer.encode_chat_completion(
|
128 |
ChatCompletionRequest(
|
|
|
110 |
]
|
111 |
)
|
112 |
)
|
113 |
+
print(f'Tokenized text: {tokenized.text}')
|
114 |
outputs = llm.generate(
|
115 |
prompts=tokenized.text,
|
116 |
sampling_params=vllm.SamplingParams(
|
117 |
temperature=0,
|
118 |
max_tokens=3000,
|
119 |
+
stop_token_ids=[tokenizer.instruct_tokenizer.tokenizer.eos_id]
|
|
|
120 |
)
|
121 |
)
|
122 |
+
print(f'TEXT: {outputs}')
|
|
|
123 |
|
124 |
+
text = outputs[0].outputs[0].text
|
125 |
|
126 |
tokenized_2 = tokenizer.encode_chat_completion(
|
127 |
ChatCompletionRequest(
|