devve1 commited on
Commit
b2c5207
1 Parent(s): e44be63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -19,7 +19,6 @@ from pydantic import BaseModel
19
  from streamlit_pills import pills
20
  from dense_embed import embed_text
21
  from ppt_chunker import ppt_chunker
22
- from huggingface_hub import snapshot_download
23
  from streamlit_navigation_bar import st_navbar
24
  from vllm.sampling_params import SamplingParams
25
  from outlines.fsm.json_schema import build_regex_from_schema
@@ -51,7 +50,6 @@ from scipy.sparse import (
51
  )
52
  from prompts import (
53
  outlines,
54
- transform_query,
55
  build_prompt_conv,
56
  route_llm,
57
  open_query_prompt,
@@ -123,7 +121,7 @@ def generate_answer(query: str,
123
 
124
  answer = f"{text}\n\n\nSource :\n\n{metadata}"
125
  else:
126
- dense_embeddings, tokens_count = asyncio.run(embed_text(dense_model[0], transform_query(query)))
127
 
128
  scored_points = query_hybrid_search(query, client, collection_name, dense_embeddings, sparse_embeddings).points
129
 
@@ -217,11 +215,11 @@ def load_models_and_documents():
217
  dense_model = AsyncEngineArray.from_args(
218
  [
219
  EngineArgs(
220
- model_name_or_path='EmbeddedLLM/bge-m3-onnx-o2-cpu',
221
- engine='optimum',
222
  device='cpu',
223
  embedding_dtype='float32',
224
- dtype='float32',
225
  pooling_method='cls',
226
  lengths_via_tokenize=True
227
  )
 
19
  from streamlit_pills import pills
20
  from dense_embed import embed_text
21
  from ppt_chunker import ppt_chunker
 
22
  from streamlit_navigation_bar import st_navbar
23
  from vllm.sampling_params import SamplingParams
24
  from outlines.fsm.json_schema import build_regex_from_schema
 
50
  )
51
  from prompts import (
52
  outlines,
 
53
  build_prompt_conv,
54
  route_llm,
55
  open_query_prompt,
 
121
 
122
  answer = f"{text}\n\n\nSource :\n\n{metadata}"
123
  else:
124
+ dense_embeddings, tokens_count = asyncio.run(embed_text(dense_model[0], query))
125
 
126
  scored_points = query_hybrid_search(query, client, collection_name, dense_embeddings, sparse_embeddings).points
127
 
 
215
  dense_model = AsyncEngineArray.from_args(
216
  [
217
  EngineArgs(
218
+ model_name_or_path='BAAI/bge-m3',
219
+ engine='torch',
220
  device='cpu',
221
  embedding_dtype='float32',
222
+ dtype='float16',
223
  pooling_method='cls',
224
  lengths_via_tokenize=True
225
  )