File size: 3,920 Bytes
ee8bf9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c044bff
 
 
 
ee8bf9a
 
c044bff
ee8bf9a
 
 
 
c044bff
ee8bf9a
 
 
 
 
 
c044bff
 
ee8bf9a
 
 
c044bff
 
 
ee8bf9a
 
c044bff
ee8bf9a
 
 
 
 
c044bff
ee8bf9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
{
  "config":{
    "query_token_id":"[unused0]",
    "doc_token_id":"[unused1]",
    "query_token":"[Q]",
    "doc_token":"[D]",
    "ncells":null,
    "centroid_score_threshold":null,
    "ndocs":null,
    "load_index_with_mmap":false,
    "index_path":null,
    "index_bsize":32,
    "nbits":8,
    "kmeans_niters":20,
    "resume":false,
    "similarity":"cosine",
    "bsize":64,
    "accumsteps":1,
    "lr":0.000004,
    "maxsteps":15000,
    "save_every":null,
    "warmup":15000,
    "warmup_bert":null,
    "relu":false,
    "nway":4,
    "use_ib_negatives":true,
    "reranker":false,
    "distillation_alpha":1.0,
    "ignore_scores":false,
    "model_name":"ak_colbertv2_250k_ar_norm",
    "query_maxlen":32,
    "attend_to_mask_tokens":false,
    "interaction":"colbert",
    "dim":128,
    "doc_maxlen":256,
    "mask_punctuation":true,
    "checkpoint":"akhooli/arabic-colbertv2-250k-norm",
    "triples":"examples_250k_no_latin.txt",
    "collection":[
      "list with 1615 elements starting with...",
      [
        "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u0623\u0644\u0645 \u062a\u0631 \u0643\u064a\u0641 \u0641\u0639\u0644 \u0631\u0628\u0643 \u0628\u0623\u0635\u062d\u0627\u0628 \u0627\u0644\u0641\u064a\u0644.\n\u0623\u0644\u0645 \u064a\u062c\u0639\u0644 \u0643\u064a\u062f\u0647\u0645 \u0641\u064a \u062a\u0636\u0644\u064a\u0644.\n\u0648\u0623\u0631\u0633\u0644 \u0639\u0644\u064a\u0647\u0645 \u0637\u064a\u0631\u0627 \u0623\u0628\u0627\u0628\u064a\u0644.\n\u062a\u0631\u0645\u064a\u0647\u0645 \u0628\u062d\u062c\u0627\u0631\u0629 \u0645\u0646 \u0633\u062c\u064a\u0644.\n\u0641\u062c\u0639\u0644\u0647\u0645 \u0643\u0639\u0635\u0641 \u0645\u0623\u0643\u0648\u0644.",
        "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u0625\u0646\u0627 \u0623\u0646\u0632\u0644\u0646\u0627\u0647 \u0641\u064a \u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631.\n\u0648\u0645\u0627 \u0623\u062f\u0631\u0627\u0643 \u0645\u0627 \u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631.\n\u0644\u064a\u0644\u0629 \u0627\u0644\u0642\u062f\u0631 \u062e\u064a\u0631 \u0645\u0646 \u0623\u0644\u0641 \u0634\u0647\u0631.\n\u062a\u0646\u0632\u0644 \u0627\u0644\u0645\u0644\u0627\u0626\u0643\u0629 \u0648\u0627\u0644\u0631\u0648\u062d \u0641\u064a\u0647\u0627 \u0628\u0625\u0630\u0646 \u0631\u0628\u0647\u0645 \u0645\u0646 \u0643\u0644 \u0623\u0645\u0631.\n\u0633\u0644\u0627\u0645 \u0647\u064a \u062d\u062a\u0649 \u0645\u0637\u0644\u0639 \u0627\u0644\u0641\u062c\u0631.",
        "\u0628\u0633\u0645 \u0627\u0644\u0644\u0647 \u0627\u0644\u0631\u062d\u0645\u0646 \u0627\u0644\u0631\u062d\u064a\u0645.\n\u064a\u0627 \u0623\u064a\u0647\u0627 \u0627\u0644\u0630\u064a\u0646 \u0622\u0645\u0646\u0648\u0627 \u0623\u0648\u0641\u0648\u0627 \u0628\u0627\u0644\u0639\u0642\u0648\u062f \u0623\u062d\u0644\u062a \u0644\u0643\u0645 \u0628\u0647\u064a\u0645\u0629 \u0627\u0644\u0623\u0646\u0639\u0627\u0645 \u0625\u0644\u0627 \u0645\u0627 \u064a\u062a\u0644\u0649 \u0639\u0644\u064a\u0643\u0645 \u063a\u064a\u0631 \u0645\u062d\u0644\u064a \u0627\u0644\u0635\u064a\u062f \u0648\u0623\u0646\u062a\u0645 \u062d\u0631\u0645 \u0625\u0646 \u0627\u0644\u0644\u0647 \u064a\u062d\u0643\u0645 \u0645\u0627 \u064a\u0631\u064a\u062f."
      ]
    ],
    "queries":"ar-queries-250k-no-latin-norm.tsv",
    "index_name":"ArColbertQuran",
    "overwrite":false,
    "root":".ragatouille/",
    "experiment":"colbert",
    "index_root":null,
    "name":"2024-08/06/15.39.12",
    "rank":0,
    "nranks":1,
    "amp":true,
    "gpus":1,
    "avoid_fork_if_possible":false
  },
  "num_chunks":1,
  "num_partitions":4096,
  "num_embeddings":124192,
  "avg_doclen":76.8990712074,
  "RAGatouille":{
    "index_config":{
      "index_type":"PLAID",
      "index_name":"ArColbertQuran"
    }
  }
}