Spaces:
Sleeping
Sleeping
geekyrakshit
commited on
Commit
·
21537b7
1
Parent(s):
694a076
refactor: colpali retrieval
Browse files
docs/retreival/{multi_modal_retrieval.md → colpali.md}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
# Multi-Modal Retrieval
|
2 |
|
3 |
-
::: medrag_multi_modal.retrieval.
|
|
|
1 |
# Multi-Modal Retrieval
|
2 |
|
3 |
+
::: medrag_multi_modal.retrieval.colpali_retrieval
|
medrag_multi_modal/retrieval/{multi_modal_retrieval.py → colpali_retrieval.py}
RENAMED
@@ -9,9 +9,9 @@ from PIL import Image
|
|
9 |
from ..utils import get_wandb_artifact
|
10 |
|
11 |
|
12 |
-
class
|
13 |
"""
|
14 |
-
|
15 |
|
16 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
17 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
@@ -20,10 +20,10 @@ class MultiModalRetriever(weave.Model):
|
|
20 |
!!! example "Indexing Data"
|
21 |
```python
|
22 |
import wandb
|
23 |
-
from medrag_multi_modal.retrieval import
|
24 |
|
25 |
wandb.init(project="medrag-multi-modal", entity="ml-colabs", job_type="index")
|
26 |
-
retriever =
|
27 |
retriever.index(
|
28 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
29 |
weave_dataset_name="grays-anatomy-images:v0",
|
@@ -36,10 +36,10 @@ class MultiModalRetriever(weave.Model):
|
|
36 |
import weave
|
37 |
|
38 |
import wandb
|
39 |
-
from medrag_multi_modal.retrieval import
|
40 |
|
41 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
42 |
-
retriever =
|
43 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
44 |
metadata_dataset_name="grays-anatomy-images:v0",
|
45 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
|
|
9 |
from ..utils import get_wandb_artifact
|
10 |
|
11 |
|
12 |
+
class CalPaliRetriever(weave.Model):
|
13 |
"""
|
14 |
+
CalPaliRetriever is a class that facilitates the retrieval of page images using ColPali.
|
15 |
|
16 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
17 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
|
|
20 |
!!! example "Indexing Data"
|
21 |
```python
|
22 |
import wandb
|
23 |
+
from medrag_multi_modal.retrieval import CalPaliRetriever
|
24 |
|
25 |
wandb.init(project="medrag-multi-modal", entity="ml-colabs", job_type="index")
|
26 |
+
retriever = CalPaliRetriever()
|
27 |
retriever.index(
|
28 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
29 |
weave_dataset_name="grays-anatomy-images:v0",
|
|
|
36 |
import weave
|
37 |
|
38 |
import wandb
|
39 |
+
from medrag_multi_modal.retrieval import CalPaliRetriever
|
40 |
|
41 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
42 |
+
retriever = CalPaliRetriever.from_artifact(
|
43 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
44 |
metadata_dataset_name="grays-anatomy-images:v0",
|
45 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
mkdocs.yml
CHANGED
@@ -72,6 +72,6 @@ nav:
|
|
72 |
- Image Loader: 'document_loader/load_image.md'
|
73 |
- Chunking: 'chunking.md'
|
74 |
- Retrieval:
|
75 |
-
-
|
76 |
|
77 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
|
|
72 |
- Image Loader: 'document_loader/load_image.md'
|
73 |
- Chunking: 'chunking.md'
|
74 |
- Retrieval:
|
75 |
+
- ColPali Retrieval: 'retreival/colpali.md'
|
76 |
|
77 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
pyproject.toml
CHANGED
@@ -5,6 +5,7 @@ description = ""
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.10"
|
7 |
dependencies = [
|
|
|
8 |
"Byaldi>=0.0.5",
|
9 |
"firerequests>=0.0.7",
|
10 |
"pdf2image>=1.17.0",
|
@@ -35,6 +36,7 @@ dependencies = [
|
|
35 |
|
36 |
[project.optional-dependencies]
|
37 |
core = [
|
|
|
38 |
"Byaldi>=0.0.5",
|
39 |
"firerequests>=0.0.7",
|
40 |
"marker-pdf>=0.2.17",
|
|
|
5 |
readme = "README.md"
|
6 |
requires-python = ">=3.10"
|
7 |
dependencies = [
|
8 |
+
"bm25s[full]>=0.2.2",
|
9 |
"Byaldi>=0.0.5",
|
10 |
"firerequests>=0.0.7",
|
11 |
"pdf2image>=1.17.0",
|
|
|
36 |
|
37 |
[project.optional-dependencies]
|
38 |
core = [
|
39 |
+
"bm25s[full]>=0.2.2",
|
40 |
"Byaldi>=0.0.5",
|
41 |
"firerequests>=0.0.7",
|
42 |
"marker-pdf>=0.2.17",
|