TeamNL commited on
Commit
95130d6
·
verified ·
1 Parent(s): ab21638

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf328c8146b8eec9567a53abf08210210939e4a910f969fe7e04da38c8b013de
3
  size 672610316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e91955b5fae14befc72f0d3b8a69ad941d20ffbe853702b4ef7aadda7d1793f9
3
  size 672610316
runs/Aug05_08-39-47_0e3db811b885/events.out.tfevents.1722847189.0e3db811b885.1667.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6aafe7d80a7f665ec95f2adfc8047f1a5bb33a3c08de21615c034c2a208fc0
3
+ size 5714
special_tokens_map.json CHANGED
@@ -1,7 +1,37 @@
1
  {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
tokenizer.json CHANGED
@@ -15,7 +15,7 @@
15
  "lstrip": false,
16
  "rstrip": false,
17
  "normalized": true,
18
- "special": false
19
  },
20
  {
21
  "id": 100,
@@ -24,7 +24,7 @@
24
  "lstrip": false,
25
  "rstrip": false,
26
  "normalized": true,
27
- "special": false
28
  },
29
  {
30
  "id": 101,
@@ -33,7 +33,7 @@
33
  "lstrip": false,
34
  "rstrip": false,
35
  "normalized": true,
36
- "special": false
37
  },
38
  {
39
  "id": 102,
@@ -42,7 +42,7 @@
42
  "lstrip": false,
43
  "rstrip": false,
44
  "normalized": true,
45
- "special": false
46
  },
47
  {
48
  "id": 103,
@@ -51,7 +51,7 @@
51
  "lstrip": false,
52
  "rstrip": false,
53
  "normalized": true,
54
- "special": false
55
  },
56
  {
57
  "id": 547,
 
15
  "lstrip": false,
16
  "rstrip": false,
17
  "normalized": true,
18
+ "special": true
19
  },
20
  {
21
  "id": 100,
 
24
  "lstrip": false,
25
  "rstrip": false,
26
  "normalized": true,
27
+ "special": true
28
  },
29
  {
30
  "id": 101,
 
33
  "lstrip": false,
34
  "rstrip": false,
35
  "normalized": true,
36
+ "special": true
37
  },
38
  {
39
  "id": 102,
 
42
  "lstrip": false,
43
  "rstrip": false,
44
  "normalized": true,
45
+ "special": true
46
  },
47
  {
48
  "id": 103,
 
51
  "lstrip": false,
52
  "rstrip": false,
53
  "normalized": true,
54
+ "special": true
55
  },
56
  {
57
  "id": 547,
tokenizer_config.json CHANGED
@@ -6,7 +6,7 @@
6
  "normalized": true,
7
  "rstrip": false,
8
  "single_word": false,
9
- "special": false
10
  },
11
  "100": {
12
  "content": "[UNK]",
@@ -14,7 +14,7 @@
14
  "normalized": true,
15
  "rstrip": false,
16
  "single_word": false,
17
- "special": false
18
  },
19
  "101": {
20
  "content": "[CLS]",
@@ -22,7 +22,7 @@
22
  "normalized": true,
23
  "rstrip": false,
24
  "single_word": false,
25
- "special": false
26
  },
27
  "102": {
28
  "content": "[SEP]",
@@ -30,7 +30,7 @@
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": false,
33
- "special": false
34
  },
35
  "103": {
36
  "content": "[MASK]",
@@ -38,7 +38,7 @@
38
  "normalized": true,
39
  "rstrip": false,
40
  "single_word": false,
41
- "special": false
42
  },
43
  "547": {
44
  "content": "क",
@@ -8805,11 +8805,18 @@
8805
  "cls_token": "[CLS]",
8806
  "do_lower_case": true,
8807
  "mask_token": "[MASK]",
 
8808
  "model_max_length": 512,
 
8809
  "pad_token": "[PAD]",
 
 
8810
  "sep_token": "[SEP]",
 
8811
  "strip_accents": null,
8812
  "tokenize_chinese_chars": true,
8813
  "tokenizer_class": "BertTokenizer",
 
 
8814
  "unk_token": "[UNK]"
8815
  }
 
6
  "normalized": true,
7
  "rstrip": false,
8
  "single_word": false,
9
+ "special": true
10
  },
11
  "100": {
12
  "content": "[UNK]",
 
14
  "normalized": true,
15
  "rstrip": false,
16
  "single_word": false,
17
+ "special": true
18
  },
19
  "101": {
20
  "content": "[CLS]",
 
22
  "normalized": true,
23
  "rstrip": false,
24
  "single_word": false,
25
+ "special": true
26
  },
27
  "102": {
28
  "content": "[SEP]",
 
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": false,
33
+ "special": true
34
  },
35
  "103": {
36
  "content": "[MASK]",
 
38
  "normalized": true,
39
  "rstrip": false,
40
  "single_word": false,
41
+ "special": true
42
  },
43
  "547": {
44
  "content": "क",
 
8805
  "cls_token": "[CLS]",
8806
  "do_lower_case": true,
8807
  "mask_token": "[MASK]",
8808
+ "max_length": 512,
8809
  "model_max_length": 512,
8810
+ "pad_to_multiple_of": null,
8811
  "pad_token": "[PAD]",
8812
+ "pad_token_type_id": 0,
8813
+ "padding_side": "right",
8814
  "sep_token": "[SEP]",
8815
+ "stride": 0,
8816
  "strip_accents": null,
8817
  "tokenize_chinese_chars": true,
8818
  "tokenizer_class": "BertTokenizer",
8819
+ "truncation_side": "right",
8820
+ "truncation_strategy": "longest_first",
8821
  "unk_token": "[UNK]"
8822
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b4e692f090b4198b3117a59a77b235641c68b552c20c3cd5d907a7dbc6cbda
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ebc6d247033c5edc73a187d45e14eabab7695e9b1091be0bd01f8fd5695bc0c
3
  size 5112