|
import os |
|
import re |
|
import time |
|
import msgpack |
|
import numpy as np |
|
import streamlit as st |
|
from io import StringIO |
|
from numpy import ndarray |
|
from langchain_core.documents.base import Document |
|
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack |
|
from qdrant_client import QdrantClient, models |
|
from langchain_community.document_loaders.wikipedia import WikipediaLoader |
|
from langchain_experimental.text_splitter import SemanticChunker |
|
from fastembed.sparse.splade_pp import supported_splade_models |
|
from fastembed import SparseTextEmbedding, SparseEmbedding |
|
from fastembed_ext import FastEmbedEmbeddingsLc |
|
from huggingface_hub import hf_hub_download |
|
from unstructured.partition.auto import partition |
|
from qdrant_client.models import ( |
|
NamedSparseVector, |
|
NamedVector, |
|
SparseVector, |
|
PointStruct, |
|
SearchRequest, |
|
ScoredPoint, |
|
) |
|
from llama_cpp import Llama |
|
|
|
MAP_PROMPT = """ |
|
You will be given a single passage of a book. This section will be enclosed in triple backticks (```) |
|
Your goal is to give a summary of this section so that a reader will have a full understanding of what happened. |
|
Your response should be at least three paragraphs and fully encompass what said in the passage. |
|
|
|
```{text}``` |
|
FULL SUMMARY: |
|
""" |
|
|
|
COMBINE_PROMPT = """ |
|
You will be given a series of summaries from a book. The summaries will be enclosed in triple backticks (```) |
|
Your goal is to give a verbose summary of what happened in the story. |
|
The reader should be able to grasp what happened in the book. |
|
|
|
```{text}``` |
|
VERBOSE SUMMARY: |
|
""" |
|
|
|
|
|
def make_points(chunks: list[str], dense: list[ndarray], sparse: list[SparseEmbedding])-> list[PointStruct]: |
|
points = [] |
|
for idx, (sparse_vector, chunk, dense_vector) in enumerate(zip(sparse, chunks, dense)): |
|
sparse_vec = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist()) |
|
point = PointStruct( |
|
id=idx, |
|
vector={ |
|
"text-sparse": sparse_vec, |
|
"text-dense": dense_vector, |
|
}, |
|
payload={ |
|
"text": chunk |
|
} |
|
) |
|
points.append(point) |
|
return points |
|
|
|
def search(client: QdrantClient, collection_name: str, dense, sparse): |
|
search_results = client.search_batch( |
|
collection_name, |
|
[ |
|
SearchRequest( |
|
vector=NamedVector( |
|
name="text-dense", |
|
vector=dense, |
|
), |
|
limit=10 |
|
), |
|
SearchRequest( |
|
vector=NamedSparseVector( |
|
name="text-sparse", |
|
vector=SparseVector( |
|
indices=sparse[0].indices.tolist(), |
|
values=sparse[0].values.tolist(), |
|
), |
|
), |
|
limit=10 |
|
), |
|
], |
|
) |
|
|
|
return search_results |
|
|
|
def rank_list(search_result: list[ScoredPoint]): |
|
return [(point.id, rank + 1) for rank, point in enumerate(search_result)] |
|
|
|
def rrf(rank_lists, alpha=60, default_rank=1000): |
|
""" |
|
Optimized Reciprocal Rank Fusion (RRF) using NumPy for large rank lists. |
|
|
|
:param rank_lists: A list of rank lists. Each rank list should be a list of (item, rank) tuples. |
|
:param alpha: The parameter alpha used in the RRF formula. Default is 60. |
|
:param default_rank: The default rank assigned to items not present in a rank list. Default is 1000. |
|
:return: Sorted list of items based on their RRF scores. |
|
""" |
|
all_items = set(item for rank_list in rank_lists for item, _ in rank_list) |
|
item_to_index = {item: idx for idx, item in enumerate(all_items)} |
|
rank_matrix = np.full((len(all_items), len(rank_lists)), default_rank) |
|
|
|
for list_idx, rank_list in enumerate(rank_lists): |
|
for item, rank in rank_list: |
|
rank_matrix[item_to_index[item], list_idx] = rank |
|
|
|
rrf_scores = np.sum(1.0 / (alpha + rank_matrix), axis=1) |
|
sorted_indices = np.argsort(-rrf_scores) |
|
sorted_items = [(list(item_to_index.keys())[idx], rrf_scores[idx]) for idx in sorted_indices] |
|
|
|
return sorted_items |
|
|
|
|
|
def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model, sparse_model): |
|
dense_query = list(dense_model.embed_query(query, 32)) |
|
sparse_query = list(sparse_model.embed(query, 32)) |
|
|
|
search_results = search( |
|
client, |
|
collection_name, |
|
dense_query, |
|
sparse_query |
|
) |
|
|
|
dense_rank_list, sparse_rank_list = rank_list(search_results[0]), rank_list(search_results[1]) |
|
rrf_rank_list = rrf([dense_rank_list, sparse_rank_list]) |
|
|
|
records_list = client.retrieve( |
|
collection_name, |
|
[item[0] for item in rrf_rank_list] |
|
) |
|
|
|
docs = [record.payload['text'] for record in records_list[:3]] |
|
|
|
combined_docs = "\n".join(docs) |
|
|
|
response = llm.create_chat_completion( |
|
messages = [ |
|
{"role": "system", "content": f"""Use the following pieces of context to answer the user question. |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
Use three sentences maximum and keep the answer as concise as possible. |
|
|
|
{combined_docs}""" |
|
}, |
|
{ |
|
"role": "user", |
|
"content": f"Question: {query}" |
|
} |
|
], stop=["</s>"], temperature=0.7) |
|
|
|
text = response["choices"][0]["message"]['content'] |
|
print(f'TEXT: {text}') |
|
|
|
output = llm.create_chat_completion( |
|
messages = [ |
|
{"role": "system", "content": """You are an assistant that provides summaries of texts. |
|
Your task is to create a summary that includes the main points and any important details. |
|
""" |
|
}, |
|
{ |
|
"role": "user", |
|
"content": f"""Write a summary of the following text delimited by triple backquotes. Ensure the summary covers the key points of the text. Avoid introducing any sentences before the summary. |
|
```{text}```""" |
|
} |
|
], stop=["</s>"], temperature=0.7, max_tokens=3000)['choices'][0]['message']['content'] |
|
print(f'OUTPUT: {output}') |
|
return output |
|
|
|
@st.cache_resource |
|
def load_models_and_documents(): |
|
print('load') |
|
supported_splade_models[0] = { |
|
"model": "prithivida/Splade_PP_en_v2", |
|
"vocab_size": 30522, |
|
"description": "Implementation of SPLADE++ Model for English v2", |
|
"size_in_GB": 0.532, |
|
"sources": { |
|
"hf": "devve1/Splade_PP_en_v2_onnx" |
|
}, |
|
"model_file": "model.onnx" |
|
} |
|
|
|
with st.spinner('Load models...'): |
|
llm = Llama.from_pretrained( |
|
repo_id="MaziyarPanahi/Llama-3-8B-Instruct-32k-v0.1-GGUF", |
|
filename="*Q8_0.gguf", |
|
verbose=False, |
|
chat_format="chatml", |
|
n_ctx=16000, |
|
n_gpu_layers=32 |
|
) |
|
|
|
provider = ['CPUExecutionProvider'] |
|
|
|
dense_model = FastEmbedEmbeddingsLc( |
|
model_name='mixedbread-ai/mxbai-embed-large-v1', |
|
providers=provider, |
|
cache_dir=os.getenv('HF_HOME'), |
|
batch_size=32 |
|
) |
|
|
|
sparse_model = SparseTextEmbedding( |
|
'prithivida/Splade_PP_en_v2', |
|
cache_dir=os.getenv('HF_HOME'), |
|
providers=provider |
|
) |
|
|
|
client = QdrantClient(':memory:') |
|
collection_name = 'collection_demo' |
|
|
|
client.create_collection( |
|
collection_name, |
|
{ |
|
"text-dense": models.VectorParams( |
|
size=1024, |
|
distance=models.Distance.COSINE, |
|
on_disk=False, |
|
quantization_config=models.BinaryQuantization( |
|
binary=models.BinaryQuantizationConfig( |
|
always_ram=True |
|
) |
|
) |
|
) |
|
}, |
|
{ |
|
"text-sparse": models.SparseVectorParams( |
|
index=models.SparseIndexParams( |
|
on_disk=False |
|
) |
|
) |
|
}, |
|
2, |
|
optimizers_config=models.OptimizersConfigDiff( |
|
indexing_threshold=0 |
|
), |
|
hnsw_config=models.HnswConfigDiff( |
|
on_disk=False, |
|
m=16, |
|
ef_construct=100 |
|
) |
|
) |
|
|
|
with st.spinner('Parse and chunk documents...'): |
|
name = 'action_rpg' |
|
embeddings_path = os.path.join(os.getenv('HF_HOME'), 'embeddings') |
|
|
|
chunks_path = os.path.join(embeddings_path, name + '_chunks.msgpack') |
|
dense_path = os.path.join(embeddings_path, name + '_dense.npz') |
|
sparse_path = os.path.join(embeddings_path, name + '_sparse.npz') |
|
|
|
if not os.path.exists(embeddings_path): |
|
os.mkdir(embeddings_path) |
|
|
|
docs_1 = WikipediaLoader(query='Action-RPG').load() |
|
docs_2 = WikipediaLoader(query='Dark Souls').load() |
|
docs_3 = WikipediaLoader(query='Zelda II: The Adventure of Link').load() |
|
|
|
|
|
|
|
|
|
docs = docs_1 + docs_2 + docs_3 |
|
chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model) |
|
|
|
with open(chunks_path, "wb") as outfile: |
|
packed = msgpack.packb(chunks, use_bin_type=True) |
|
outfile.write(packed) |
|
|
|
np.savez_compressed(dense_path, *dense_embeddings) |
|
max_index = max(np.max(embedding.indices) for embedding in sparse_embeddings) |
|
|
|
sparse_matrices = [] |
|
for embedding in sparse_embeddings: |
|
data = embedding.values |
|
indices = embedding.indices |
|
indptr = np.array([0, len(data)]) |
|
matrix = csr_matrix((data, indices, indptr), shape=(1, max_index + 1)) |
|
sparse_matrices.append(matrix) |
|
|
|
combined_sparse_matrix = vstack(sparse_matrices) |
|
save_npz(sparse_path, combined_sparse_matrix) |
|
else: |
|
with open(chunks_path, "rb") as data_file: |
|
byte_data = data_file.read() |
|
|
|
chunks = msgpack.unpackb(byte_data, raw=False) |
|
|
|
dense_embeddings = list(np.load(dense_path).values()) |
|
|
|
sparse_embeddings = [] |
|
loaded_sparse_matrix = load_npz(sparse_path) |
|
|
|
for i in range(loaded_sparse_matrix.shape[0]): |
|
row = loaded_sparse_matrix.getrow(i) |
|
values = row.data |
|
indices = row.indices |
|
embedding = SparseEmbedding(values, indices) |
|
sparse_embeddings.append(embedding) |
|
|
|
with st.spinner('Save documents...'): |
|
client.upsert( |
|
collection_name, |
|
make_points( |
|
chunks, |
|
dense_embeddings, |
|
sparse_embeddings |
|
) |
|
) |
|
client.update_collection( |
|
collection_name=collection_name, |
|
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000) |
|
) |
|
|
|
return client, collection_name, llm, dense_model, sparse_model |
|
|
|
def chunk_documents(docs, dense_model, sparse_model): |
|
text_splitter = SemanticChunker( |
|
dense_model, |
|
breakpoint_threshold_type='standard_deviation' |
|
) |
|
|
|
documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))] |
|
|
|
dense_embeddings = dense_model.embed_documents(documents,32) |
|
sparse_embeddings = list(sparse_model.embed(documents, 32)) |
|
|
|
return documents, dense_embeddings, sparse_embeddings |
|
|
|
if __name__ == '__main__': |
|
st.set_page_config(page_title="Video Game Assistant", |
|
layout="wide" |
|
) |
|
st.title("Video Game Assistant") |
|
|
|
client, collection_name, llm, dense_model, sparse_model = load_models_and_documents() |
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
if prompt := st.chat_input("Message Video Game Assistant"): |
|
st.chat_message("user").markdown(prompt) |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model) |
|
response = f"Echo: {ai_response}" |
|
with st.chat_message("assistant"): |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
for chunk in re.split(r'(\s+)', response): |
|
full_response += chunk + " " |
|
time.sleep(0.01) |
|
message_placeholder.markdown(full_response + "▌") |
|
st.session_state.messages.append({"role": "assistant", "content": full_response}) |
|
|
|
st.sidebar.title("Upload your file") |
|
uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True, type=['docx', 'doc', 'odt', 'pptx', 'ppt', 'xlsx', 'csv', 'tsv', 'eml', 'msg', 'rtf', 'epub', 'html', 'xml', 'pdf', 'png', 'jpg', 'heic','txt']) |
|
print(f'uploaded-files : {uploaded_files}') |
|
for uploaded_file in uploaded_files: |
|
print('count') |
|
elements = partition(file=uploaded_file, |
|
strategy='hi_res', |
|
skip_infer_table_types=['png', 'pdf', 'jpg', 'xls', 'xlsx', 'heic'], |
|
hi_res_model_name='yolox', |
|
include_page_breaks=True |
|
) |
|
docs = [Document(elem.text) for elem in elements] |
|
|
|
chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model) |
|
client.upsert( |
|
collection_name, |
|
make_points( |
|
chunks, |
|
dense_embeddings, |
|
sparse_embeddings |
|
) |
|
) |