document-answering / vector_db.py
pflooky's picture
Use gradio for document answering
8324134
raw
history blame contribute delete
No virus
1.91 kB
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
class VectorDB:
embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
local_folder = "db/faiss_db"
is_load_local = False
text_embeddings = None
docs_db = None
def __init__(self):
self.text_embeddings = self.init_text_embeddings(self.embedding_model, self.model_kwargs, self.encode_kwargs)
self.docs_db = self.init_vector_db(self.local_folder, self.text_embeddings)
def init_text_embeddings(self, embedding_model: str, model_kwargs: dict, encode_kwargs: dict):
return HuggingFaceEmbeddings(
model_name=embedding_model,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
def init_vector_db(self, folder_path: str, text_embeddings: HuggingFaceEmbeddings):
if self.is_load_local:
try:
return FAISS.load_local(folder_path=folder_path, embeddings=text_embeddings)
except Exception as e:
return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)
else:
return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)
def load_docs_into_vector_db(self, doc_chunks: list):
if len(doc_chunks) != 0:
if self.docs_db is None:
self.docs_db = FAISS.from_documents(doc_chunks, embedding=self.text_embeddings)
else:
self.docs_db.add_documents(doc_chunks)
def save_vector_db(self):
if self.docs_db is not None and not self.is_load_local:
self.docs_db.save_local(self.local_folder)
else:
print("No vector db to save.")