Update app.py
Browse files
app.py
CHANGED
@@ -28,6 +28,7 @@ from qdrant_client.models import (
|
|
28 |
SearchRequest,
|
29 |
ScoredPoint,
|
30 |
)
|
|
|
31 |
|
32 |
MAP_PROMPT = """
|
33 |
You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
|
@@ -155,38 +156,28 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
155 |
|
156 |
combined_docs = "\n".join(docs)
|
157 |
|
158 |
-
template = """Use the following pieces of context to answer the question at the end.
|
159 |
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
160 |
Use three sentences maximum and keep the answer as concise as possible.
|
161 |
-
Always say "thanks for asking!" at the end of the answer.
|
162 |
|
163 |
-
{
|
164 |
|
165 |
-
Question: {
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
rag_prompt = PromptTemplate.from_template(template)
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
output = chain.invoke(query)
|
174 |
-
print('THHFGHFGBFGBFB')
|
175 |
|
176 |
-
|
|
|
|
|
177 |
Return your response in bullet points which covers the key points of the text.
|
178 |
```{text}```
|
179 |
-
|
180 |
"""
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
prompt=prompt
|
185 |
-
)
|
186 |
-
|
187 |
-
summary = summary_chain.invoke(output['output_text'])
|
188 |
-
print('ADSDGFGVHBTFEFSDGFES')
|
189 |
-
return summary['output_text']
|
190 |
|
191 |
@st.cache_resource
|
192 |
def load_models_and_documents():
|
@@ -202,17 +193,12 @@ def load_models_and_documents():
|
|
202 |
}
|
203 |
|
204 |
with st.spinner('Load models...'):
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
n_ctx=32000,
|
212 |
-
max_tokens=2000,
|
213 |
-
n_gpu_layers=32,
|
214 |
-
n_batch=256,
|
215 |
-
f16_kv=True
|
216 |
)
|
217 |
|
218 |
provider = ['CPUExecutionProvider']
|
|
|
28 |
SearchRequest,
|
29 |
ScoredPoint,
|
30 |
)
|
31 |
+
from llama_cpp import Llama
|
32 |
|
33 |
MAP_PROMPT = """
|
34 |
You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
|
|
|
156 |
|
157 |
combined_docs = "\n".join(docs)
|
158 |
|
159 |
+
template = f"""Q: Use the following pieces of context to answer the question at the end.
|
160 |
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
161 |
Use three sentences maximum and keep the answer as concise as possible.
|
|
|
162 |
|
163 |
+
{combined_docs}
|
164 |
|
165 |
+
Question: {query}
|
166 |
|
167 |
+
A: """
|
|
|
|
|
168 |
|
169 |
+
response = llm(template, stop=["Q:", "\n"], temperature=0.7)
|
|
|
|
|
|
|
170 |
|
171 |
+
text = response["choices"][0]["text"]
|
172 |
+
|
173 |
+
prompt = f"""Q: Write a summary of the following text delimited by triple backquotes that includes the main points and any important details.
|
174 |
Return your response in bullet points which covers the key points of the text.
|
175 |
```{text}```
|
176 |
+
A :
|
177 |
"""
|
178 |
+
|
179 |
+
output = llm(prompt, stop=["Q:", "\n"], temperature=0.7, max_tokens)
|
180 |
+
return output["choices"][0]["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
@st.cache_resource
|
183 |
def load_models_and_documents():
|
|
|
193 |
}
|
194 |
|
195 |
with st.spinner('Load models...'):
|
196 |
+
llm = Llama.from_pretrained(
|
197 |
+
repo_id="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF",
|
198 |
+
filename="*Q8_0.gguf",
|
199 |
+
verbose=False,
|
200 |
+
n_ctx=16000,
|
201 |
+
n_gpu_layers=32
|
|
|
|
|
|
|
|
|
|
|
202 |
)
|
203 |
|
204 |
provider = ['CPUExecutionProvider']
|