Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -150,17 +150,19 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
150 |
|
151 |
combined_docs = "\n".join(docs)
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
164 |
|
165 |
text = response["choices"][0]["text"]
|
166 |
print(f'TEXT: {text}')
|
@@ -171,7 +173,18 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
171 |
A :
|
172 |
"""
|
173 |
|
174 |
-
output = llm(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
print(f'OUTPUT: {output}')
|
176 |
return output
|
177 |
|
@@ -193,6 +206,7 @@ def load_models_and_documents():
|
|
193 |
repo_id="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF",
|
194 |
filename="*Q8_0.gguf",
|
195 |
verbose=False,
|
|
|
196 |
n_ctx=16000,
|
197 |
n_gpu_layers=32
|
198 |
)
|
|
|
150 |
|
151 |
combined_docs = "\n".join(docs)
|
152 |
|
153 |
+
response = llm.create_chat_completion(
|
154 |
+
messages = [
|
155 |
+
{"role": "system", "content": f"""Use the following pieces of context to answer the user question.
|
156 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
157 |
+
Use three sentences maximum and keep the answer as concise as possible.
|
158 |
+
|
159 |
+
{combined_docs}"""
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"role": "user",
|
163 |
+
"content": f"Question: {query}"
|
164 |
+
}
|
165 |
+
], stop=["</s>"], temperature=0.7)
|
166 |
|
167 |
text = response["choices"][0]["text"]
|
168 |
print(f'TEXT: {text}')
|
|
|
173 |
A :
|
174 |
"""
|
175 |
|
176 |
+
output = llm.create_chat_completion(
|
177 |
+
messages = [
|
178 |
+
{"role": "system", "content": """You are an assistant that provides summaries of texts.
|
179 |
+
Your task is to create a summary that includes the main points and any important details.
|
180 |
+
Present your response in bullet points."""
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"role": "user",
|
184 |
+
"content": f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text.
|
185 |
+
```{text}```"""
|
186 |
+
}
|
187 |
+
], stop=["</s>"], temperature=0.7, max_tokens=3000)['choices'][0]['text']
|
188 |
print(f'OUTPUT: {output}')
|
189 |
return output
|
190 |
|
|
|
206 |
repo_id="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF",
|
207 |
filename="*Q8_0.gguf",
|
208 |
verbose=False,
|
209 |
+
chat_format="chatml",
|
210 |
n_ctx=16000,
|
211 |
n_gpu_layers=32
|
212 |
)
|