Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ from pydantic import BaseModel
|
|
19 |
from streamlit_pills import pills
|
20 |
from dense_embed import embed_text
|
21 |
from ppt_chunker import ppt_chunker
|
22 |
-
from huggingface_hub import snapshot_download
|
23 |
from streamlit_navigation_bar import st_navbar
|
24 |
from vllm.sampling_params import SamplingParams
|
25 |
from outlines.fsm.json_schema import build_regex_from_schema
|
@@ -51,7 +50,6 @@ from scipy.sparse import (
|
|
51 |
)
|
52 |
from prompts import (
|
53 |
outlines,
|
54 |
-
transform_query,
|
55 |
build_prompt_conv,
|
56 |
route_llm,
|
57 |
open_query_prompt,
|
@@ -123,7 +121,7 @@ def generate_answer(query: str,
|
|
123 |
|
124 |
answer = f"{text}\n\n\nSource :\n\n{metadata}"
|
125 |
else:
|
126 |
-
dense_embeddings, tokens_count = asyncio.run(embed_text(dense_model[0],
|
127 |
|
128 |
scored_points = query_hybrid_search(query, client, collection_name, dense_embeddings, sparse_embeddings).points
|
129 |
|
@@ -217,11 +215,11 @@ def load_models_and_documents():
|
|
217 |
dense_model = AsyncEngineArray.from_args(
|
218 |
[
|
219 |
EngineArgs(
|
220 |
-
model_name_or_path='
|
221 |
-
engine='
|
222 |
device='cpu',
|
223 |
embedding_dtype='float32',
|
224 |
-
dtype='
|
225 |
pooling_method='cls',
|
226 |
lengths_via_tokenize=True
|
227 |
)
|
|
|
19 |
from streamlit_pills import pills
|
20 |
from dense_embed import embed_text
|
21 |
from ppt_chunker import ppt_chunker
|
|
|
22 |
from streamlit_navigation_bar import st_navbar
|
23 |
from vllm.sampling_params import SamplingParams
|
24 |
from outlines.fsm.json_schema import build_regex_from_schema
|
|
|
50 |
)
|
51 |
from prompts import (
|
52 |
outlines,
|
|
|
53 |
build_prompt_conv,
|
54 |
route_llm,
|
55 |
open_query_prompt,
|
|
|
121 |
|
122 |
answer = f"{text}\n\n\nSource :\n\n{metadata}"
|
123 |
else:
|
124 |
+
dense_embeddings, tokens_count = asyncio.run(embed_text(dense_model[0], query))
|
125 |
|
126 |
scored_points = query_hybrid_search(query, client, collection_name, dense_embeddings, sparse_embeddings).points
|
127 |
|
|
|
215 |
dense_model = AsyncEngineArray.from_args(
|
216 |
[
|
217 |
EngineArgs(
|
218 |
+
model_name_or_path='BAAI/bge-m3',
|
219 |
+
engine='torch',
|
220 |
device='cpu',
|
221 |
embedding_dtype='float32',
|
222 |
+
dtype='float16',
|
223 |
pooling_method='cls',
|
224 |
lengths_via_tokenize=True
|
225 |
)
|