|
--- |
|
language: |
|
- en |
|
license: mit |
|
--- |
|
|
|
This model was trained with [Neural-Cherche](https://github.com/raphaelsty/neural-cherche). You can find details on how to fine-tune it in the [Neural-Cherche](https://github.com/raphaelsty/neural-cherche) repository. |
|
|
|
|
|
```sh |
|
pip install neural-cherche |
|
``` |
|
|
|
## Retriever |
|
|
|
```python |
|
from neural_cherche import models, retrieve |
|
import torch |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
batch_size = 32 |
|
|
|
documents = [ |
|
{"id": 0, "document": "Food"}, |
|
{"id": 1, "document": "Sports"}, |
|
{"id": 2, "document": "Cinema"}, |
|
] |
|
|
|
queries = ["Food", "Sports", "Cinema"] |
|
|
|
model = models.SparseEmbed( |
|
model_name_or_path="raphaelsty/neural-cherche-sparse-embed", |
|
device=device, |
|
) |
|
|
|
retriever = retrieve.SparseEmbed( |
|
key="id", |
|
on=["document"], |
|
model=model, |
|
) |
|
|
|
documents_embeddings = retriever.encode_documents( |
|
documents=documents, |
|
batch_size=batch_size, |
|
) |
|
|
|
retriever = retriever.add( |
|
documents_embeddings=documents_embeddings, |
|
) |
|
|
|
queries_embeddings = retriever.encode_queries( |
|
queries=queries, |
|
batch_size=batch_size, |
|
) |
|
|
|
scores = retriever( |
|
queries_embeddings=queries_embeddings, |
|
batch_size=batch_size, |
|
k=100, |
|
) |
|
|
|
scores |
|
``` |
|
|