|
from typing import Any, Dict, List |
|
|
|
from fastrag.rankers import QuantizedBiEncoderRanker |
|
from fastrag.retrievers import QuantizedBiEncoderRetriever |
|
from haystack import Pipeline |
|
from haystack.document_stores import InMemoryDocumentStore |
|
from haystack.schema import Document |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path=""): |
|
EXAMPLES = [ |
|
"There is a blue house on Oxford Street.", |
|
"Paris is the capital of France.", |
|
"The first commit in fastRAG was in 2022", |
|
] |
|
document_store = InMemoryDocumentStore(use_gpu=False, use_bm25=False, embedding_dim=384, return_embedding=True) |
|
|
|
documents = [] |
|
for i, d in enumerate(EXAMPLES): |
|
documents.append(Document(content=d, id=i)) |
|
|
|
document_store.write_documents(documents) |
|
|
|
model_id = "Intel/bge-small-en-v1.5-rag-int8-static" |
|
retriever = QuantizedBiEncoderRetriever(document_store=document_store, embedding_model=model_id) |
|
document_store.update_embeddings(retriever=retriever) |
|
|
|
ranker = QuantizedBiEncoderRanker("Intel/bge-large-en-v1.5-rag-int8-static") |
|
|
|
self.pipe = Pipeline() |
|
self.pipe.add_node(component=retriever, name="retriever", inputs=["Query"]) |
|
self.pipe.add_node(component=ranker, name="ranker", inputs=["retriever"]) |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
query = data.pop("inputs", data) |
|
results = self.pipe.run(query=query) |
|
return results |
|
|