geekyrakshit commited on
Commit
7df75ff
·
1 Parent(s): a7ff122

add: MultiModalRetriever

Browse files
medrag_multi_modal/retrieval/__init__.py ADDED
File without changes
medrag_multi_modal/retrieval/multi_modal_retrieval.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import weave
2
+ from byaldi import RAGMultiModalModel
3
+ import wandb
4
+
5
+
6
+ class MultiModalRetriever(weave.Model):
7
+ model_name: str
8
+ _docs_retrieval_model: RAGMultiModalModel
9
+
10
+ def __init__(self, model_name: str = "vidore/colpali-v1.2"):
11
+ super().__init__(model_name=model_name)
12
+ self._docs_retrieval_model = RAGMultiModalModel.from_pretrained(self.model_name)
13
+
14
+ def index(self, data_artifact_name: str, weave_dataset_name: str, index_name: str):
15
+ if wandb.run:
16
+ artifact = wandb.use_artifact(data_artifact_name, type='dataset')
17
+ artifact_dir = artifact.download()
18
+ else:
19
+ api = wandb.Api()
20
+ artifact = api.artifact(data_artifact_name)
21
+ artifact_dir = artifact.download()
22
+ self._docs_retrieval_model.index(input_path=artifact_dir, index_name=index_name)