devve1 commited on
Commit
a91bbdd
1 Parent(s): 7977ce2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -1,23 +1,23 @@
1
  import os
2
  import re
3
  import time
 
4
  import msgpack
5
  import numpy as np
6
  import streamlit as st
7
  from numpy import ndarray
 
8
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
9
  from qdrant_client import QdrantClient, models
10
- from fastembed.sparse.splade_pp import supported_splade_models
11
- from fastembed import SparseTextEmbedding, SparseEmbedding
12
- from fastembed_ext import FastEmbedEmbeddingsLc
13
  from langchain_community.llms.llamacpp import LlamaCpp
14
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
15
  from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
16
  from langchain_core.prompts import PromptTemplate
17
  from langchain.chains.summarize import load_summarize_chain
18
  from langchain_experimental.text_splitter import SemanticChunker
 
19
  from langchain_core.documents import Document
20
- from huggingface_hub import snapshot_download
21
  from qdrant_client.models import (
22
  NamedSparseVector,
23
  NamedVector,
@@ -63,7 +63,7 @@ def make_points(chunks: list[str], dense: list[ndarray], indices, values)-> list
63
  points.append(point)
64
  return points
65
 
66
- def search(client: QdrantClient, collection_name: str, dense: ndarray, sparse: list[SparseEmbedding]):
67
  search_results = client.search_batch(
68
  collection_name,
69
  [
@@ -78,8 +78,8 @@ def search(client: QdrantClient, collection_name: str, dense: ndarray, sparse: l
78
  vector=NamedSparseVector(
79
  name="text-sparse",
80
  vector=SparseVector(
81
- indices=sparse[0].indices.tolist(),
82
- values=sparse[0].values.tolist(),
83
  ),
84
  ),
85
  limit=10
@@ -132,7 +132,7 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
132
  # docs = docs.load()
133
 
134
 
135
- dense_query = compute_dense(query, dense_model)
136
  sparse_query = compute_sparse(query, sparse_model, sparse_tokenizer)
137
 
138
  search_results = search(
@@ -197,28 +197,22 @@ def compute_sparse(sentence, model, tokenizer):
197
  max_val, _ = torch.max(weighted_log, dim=1)
198
  vector = max_val.squeeze()
199
 
200
- cols = vector.nonzero().numpy().flatten()
201
- weights = vector.detach().numpy()[cols]
202
 
203
  return cols, weights
204
 
205
- def compute_dense(sentence, model):
206
-
207
 
208
- def load_models_and_documents():
209
- supported_splade_models[0] = {
210
- "model": "prithivida/Splade_PP_en_v2",
211
- "vocab_size": 30522,
212
- "description": "Implementation of SPLADE++ Model for English v2",
213
- "size_in_GB": 0.532,
214
- "sources": {
215
- "hf": "devve1/Splade_PP_en_v2_onnx"
216
- },
217
- "model_file": "model.onnx"
218
- }
219
 
 
220
  with st.spinner('Load models...'):
221
- model_path = snapshot_download(repo_id='Zoyd/NousResearch_Hermes-2-Theta-Llama-3-8B-6_5bpw_exl2')
 
 
222
 
223
  llm = LlamaCpp(
224
  model_path=model_path,
@@ -233,6 +227,11 @@ def load_models_and_documents():
233
  reverse_voc = {v: k for k, v in tokenizer.vocab.items()}
234
  sparse_model = AutoModelForMaskedLM.from_pretrained('prithivida/Splade_PP_en_v2')
235
 
 
 
 
 
 
236
  client = QdrantClient(path=os.getenv('HF_HOME'))
237
  collection_name = 'collection_demo'
238
 
@@ -344,7 +343,7 @@ def chunk_documents(docs, dense_model, sparse_model, sparse_tokenizer):
344
 
345
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
346
 
347
- dense_embeddings = compute_dense(documents, dense_model)
348
  indices, values = compute_sparse(documents, sparse_model, sparse_tokenizer)
349
 
350
  return documents, dense_embeddings, indices, values
 
1
  import os
2
  import re
3
  import time
4
+ import torch
5
  import msgpack
6
  import numpy as np
7
  import streamlit as st
8
  from numpy import ndarray
9
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
10
  from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
11
  from qdrant_client import QdrantClient, models
 
 
 
12
  from langchain_community.llms.llamacpp import LlamaCpp
13
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
14
  from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
15
  from langchain_core.prompts import PromptTemplate
16
  from langchain.chains.summarize import load_summarize_chain
17
  from langchain_experimental.text_splitter import SemanticChunker
18
+ from langchain_community.embeddings import HuggingFaceEmbeddings
19
  from langchain_core.documents import Document
20
+ from huggingface_hub import hf_hub_download
21
  from qdrant_client.models import (
22
  NamedSparseVector,
23
  NamedVector,
 
63
  points.append(point)
64
  return points
65
 
66
+ def search(client: QdrantClient, collection_name: str, dense, indices, values):
67
  search_results = client.search_batch(
68
  collection_name,
69
  [
 
78
  vector=NamedSparseVector(
79
  name="text-sparse",
80
  vector=SparseVector(
81
+ indices=indices.tolist(),
82
+ values=values.tolist(),
83
  ),
84
  ),
85
  limit=10
 
132
  # docs = docs.load()
133
 
134
 
135
+ dense_query = compute_dense_query(query, dense_model)
136
  sparse_query = compute_sparse(query, sparse_model, sparse_tokenizer)
137
 
138
  search_results = search(
 
197
  max_val, _ = torch.max(weighted_log, dim=1)
198
  vector = max_val.squeeze()
199
 
200
+ cols = vector.nonzero().squeeze().tolist()
201
+ weights = vector[cols].tolist()
202
 
203
  return cols, weights
204
 
205
+ def compute_dense_query(sentence, model):
206
+ return model.embed_query(f'Represent this sentence for searching relevant passages: {sentence}')
207
 
208
+ def compute_dense_docs(docs, model):
209
+ return model.embed_documents(docs)
 
 
 
 
 
 
 
 
 
210
 
211
+ def load_models_and_documents():
212
  with st.spinner('Load models...'):
213
+ model_path = hf_hub_download(repo_id='NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF',
214
+ filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
215
+ )
216
 
217
  llm = LlamaCpp(
218
  model_path=model_path,
 
227
  reverse_voc = {v: k for k, v in tokenizer.vocab.items()}
228
  sparse_model = AutoModelForMaskedLM.from_pretrained('prithivida/Splade_PP_en_v2')
229
 
230
+ dense_model = HuggingFaceEmbeddings(model_name='mixedbread-ai/mxbai-embed-large-v1',
231
+ cache_folder=os.getenv('HF_HOME'),
232
+ model_kwargs={'truncate_dim':512}
233
+ )
234
+
235
  client = QdrantClient(path=os.getenv('HF_HOME'))
236
  collection_name = 'collection_demo'
237
 
 
343
 
344
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
345
 
346
+ dense_embeddings = compute_dense_docs(documents, dense_model)
347
  indices, values = compute_sparse(documents, sparse_model, sparse_tokenizer)
348
 
349
  return documents, dense_embeddings, indices, values