mColBERT / colbert /index_faiss.py
vjeronymo2's picture
Adding model and checkpoint
828992f
import os
import random
import math
from colbert.utils.runs import Run
from colbert.utils.parser import Arguments
from colbert.indexing.faiss import index_faiss
from colbert.indexing.loaders import load_doclens
def main():
random.seed(12345)
parser = Arguments(description='Faiss indexing for end-to-end retrieval with ColBERT.')
parser.add_index_use_input()
parser.add_argument('--sample', dest='sample', default=None, type=float)
parser.add_argument('--slices', dest='slices', default=1, type=int)
args = parser.parse()
assert args.slices >= 1
assert args.sample is None or (0.0 < args.sample < 1.0), args.sample
with Run.context():
args.index_path = os.path.join(args.index_root, args.index_name)
assert os.path.exists(args.index_path), args.index_path
num_embeddings = sum(load_doclens(args.index_path))
print("#> num_embeddings =", num_embeddings)
if args.partitions is None:
args.partitions = 1 << math.ceil(math.log2(8 * math.sqrt(num_embeddings)))
print('\n\n')
Run.warn("You did not specify --partitions!")
Run.warn("Default computation chooses", args.partitions,
"partitions (for {} embeddings)".format(num_embeddings))
print('\n\n')
index_faiss(args)
if __name__ == "__main__":
main()