File size: 4,138 Bytes
ee8bf9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c044bff
 
 
 
ee8bf9a
 
c044bff
ee8bf9a
 
 
 
c044bff
ee8bf9a
 
 
 
 
 
c044bff
 
ee8bf9a
 
 
c044bff
 
 
ee8bf9a
 
c044bff
ee8bf9a
 
 
 
 
c044bff
ee8bf9a
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{
    "config": {
        "query_token_id": "[unused0]",
        "doc_token_id": "[unused1]",
        "query_token": "[Q]",
        "doc_token": "[D]",
        "ncells": null,
        "centroid_score_threshold": null,
        "ndocs": null,
        "load_index_with_mmap": false,
        "index_path": null,
        "index_bsize": 32,
        "nbits": 8,
        "kmeans_niters": 20,
        "resume": false,
        "similarity": "cosine",
        "bsize": 64,
        "accumsteps": 1,
        "lr": 4e-6,
        "maxsteps": 15000,
        "save_every": null,
        "warmup": 15000,
        "warmup_bert": null,
        "relu": false,
        "nway": 4,
        "use_ib_negatives": true,
        "reranker": false,
        "distillation_alpha": 1.0,
        "ignore_scores": false,
        "model_name": "ak_colbertv2_250k_ar_norm",
        "query_maxlen": 32,
        "attend_to_mask_tokens": false,
        "interaction": "colbert",
        "dim": 128,
        "doc_maxlen": 256,
        "mask_punctuation": true,
        "checkpoint": "akhooli\/arabic-colbertv2-250k-norm",
        "triples": "examples_250k_no_latin.txt",
        "collection": [
            "list with 1615 elements starting with...",
            [
                "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u0623\u0644\u0645 \u062a\u0631 \u0643\u064a\u0641 \u0641\u0639\u0644 \u0631\u0628\u0643 \u0628\u0623\u0635\u062d\u0627\u0628 \u0627\u0644\u0641\u064a\u0644.\n\u0623\u0644\u0645 \u064a\u062c\u0639\u0644 \u0643\u064a\u062f\u0647\u0645 \u0641\u064a \u062a\u0636\u0644\u064a\u0644.\n\u0648\u0623\u0631\u0633\u0644 \u0639\u0644\u064a\u0647\u0645 \u0637\u064a\u0631\u0627 \u0623\u0628\u0627\u0628\u064a\u0644.\n\u062a\u0631\u0645\u064a\u0647\u0645 \u0628\u062d\u062c\u0627\u0631\u0629 \u0645\u0646 \u0633\u062c\u064a\u0644.\n\u0641\u062c\u0639\u0644\u0647\u0645 \u0643\u0639\u0635\u0641 \u0645\u0623\u0643\u0648\u0644.",
                "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u0625\u0646\u0627 \u0623\u0646\u0632\u0644\u0646\u0627\u0647 \u0641\u064a \u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631.\n\u0648\u0645\u0627 \u0623\u062f\u0631\u0627\u0643 \u0645\u0627 \u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631.\n\u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631 \u062e\u064a\u0631 \u0645\u0646 \u0623\u0644\u0641 \u0634\u0647\u0631.\n\u062a\u0646\u0632\u0644 \u0627\u0644\u0645\u0644\u0627\u0626\u0643\u0629 \u0648\u0627\u0644\u0631\u0648\u062d \u0641\u064a\u0647\u0627 \u0628\u0625\u0630\u0646 \u0631\u0628\u0647\u0645 \u0645\u0646 \u0643\u0644 \u0623\u0645\u0631.\n\u0633\u0644\u0627\u0645 \u0647\u064a \u062d\u062a\u0649 \u0645\u0637\u0644\u0639 \u0627\u0644\u0641\u062c\u0631.",
                "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u064a\u0627 \u0623\u064a\u0647\u0627 \u0627\u0644\u0630\u064a\u0646 \u0622\u0645\u0646\u0648\u0627 \u0623\u0648\u0641\u0648\u0627 \u0628\u0627\u0644\u0639\u0642\u0648\u062f \u0623\u062d\u0644\u062a \u0644\u0643\u0645 \u0628\u0647\u064a\u0645\u0629 \u0627\u0644\u0623\u0646\u0639\u0627\u0645 \u0625\u0644\u0627 \u0645\u0627 \u064a\u062a\u0644\u0649 \u0639\u0644\u064a\u0643\u0645 \u063a\u064a\u0631 \u0645\u062d\u0644\u064a \u0627\u0644\u0635\u064a\u062f \u0648\u0623\u0646\u062a\u0645 \u062d\u0631\u0645 \u0625\u0646 \u0627\u0644\u0644\u0647 \u064a\u062d\u0643\u0645 \u0645\u0627 \u064a\u0631\u064a\u062f."
            ]
        ],
        "queries": "ar-queries-250k-no-latin-norm.tsv",
        "index_name": "ArColbertQuran",
        "overwrite": false,
        "root": ".ragatouille\/",
        "experiment": "colbert",
        "index_root": null,
        "name": "2024-08\/06\/15.39.12",
        "rank": 0,
        "nranks": 1,
        "amp": true,
        "gpus": 1,
        "avoid_fork_if_possible": false
    },
    "num_chunks": 1,
    "num_partitions": 4096,
    "num_embeddings_est": 124191.99924468994,
    "avg_doclen_est": 76.8990707397461
}