kimsan0622 commited on
Commit
f68803e
·
1 Parent(s): db26b82

Upload processor

Browse files
preprocessor_config.json CHANGED
@@ -3,13 +3,14 @@
3
  "AutoProcessor": "processing_veld.VELDProcessor"
4
  },
5
  "do_normalize": true,
 
6
  "do_resize": true,
7
- "feature_extractor_type": "ViTFeatureExtractor",
8
  "image_mean": [
9
  0.5,
10
  0.5,
11
  0.5
12
  ],
 
13
  "image_std": [
14
  0.5,
15
  0.5,
@@ -17,5 +18,9 @@
17
  ],
18
  "processor_class": "VELDProcessor",
19
  "resample": 2,
20
- "size": 384
 
 
 
 
21
  }
 
3
  "AutoProcessor": "processing_veld.VELDProcessor"
4
  },
5
  "do_normalize": true,
6
+ "do_rescale": true,
7
  "do_resize": true,
 
8
  "image_mean": [
9
  0.5,
10
  0.5,
11
  0.5
12
  ],
13
+ "image_processor_type": "ViTImageProcessor",
14
  "image_std": [
15
  0.5,
16
  0.5,
 
18
  ],
19
  "processor_class": "VELDProcessor",
20
  "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 384,
24
+ "width": 384
25
+ }
26
  }
processing_veld.py CHANGED
@@ -24,16 +24,16 @@ class VELDProcessor(ProcessorMixin):
24
  r"""
25
  Constructs a VELD processor which wraps a vision feature extractor and a tokenizer into a single
26
  processor.
27
- [`VELDProcessor`] offers all the functionalities of [`AutoFeatureExtractor`] and
28
  [`AutoTokenizer`]. See the [`~VELDProcessor.__call__`] and
29
  [`~VELDProcessor.decode`] for more information.
30
  Args:
31
- feature_extractor ([`AutoFeatureExtractor`]):
32
  The feature extractor is a required input.
33
  tokenizer ([`PreTrainedTokenizer`]):
34
  The tokenizer is a required input.
35
  """
36
- feature_extractor_class = "AutoFeatureExtractor"
37
  tokenizer_class = "AutoTokenizer"
38
 
39
  def __init__(self, feature_extractor, tokenizer):
@@ -45,7 +45,7 @@ class VELDProcessor(ProcessorMixin):
45
  Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
46
  and `kwargs` arguments to VisionTextDualEncoderTokenizer's [`~PreTrainedTokenizer.__call__`] if `text` is not
47
  `None` to encode the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
48
- AutoFeatureExtractor's [`~AutoFeatureExtractor.__call__`] if `images` is not `None`. Please refer to the
49
  doctsring of the above two methods for more information.
50
  Args:
51
  text (`str`, `List[str]`, `List[List[str]]`):
 
24
  r"""
25
  Constructs a VELD processor which wraps a vision feature extractor and a tokenizer into a single
26
  processor.
27
+ [`VELDProcessor`] offers all the functionalities of [`AutoImageProcessor`] and
28
  [`AutoTokenizer`]. See the [`~VELDProcessor.__call__`] and
29
  [`~VELDProcessor.decode`] for more information.
30
  Args:
31
+ feature_extractor ([`AutoImageProcessor`]):
32
  The feature extractor is a required input.
33
  tokenizer ([`PreTrainedTokenizer`]):
34
  The tokenizer is a required input.
35
  """
36
+ feature_extractor_class = "AutoImageProcessor"
37
  tokenizer_class = "AutoTokenizer"
38
 
39
  def __init__(self, feature_extractor, tokenizer):
 
45
  Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
46
  and `kwargs` arguments to VisionTextDualEncoderTokenizer's [`~PreTrainedTokenizer.__call__`] if `text` is not
47
  `None` to encode the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
48
+ AutoImageProcessor's [`~AutoImageProcessor.__call__`] if `images` is not `None`. Please refer to the
49
  doctsring of the above two methods for more information.
50
  Args:
51
  text (`str`, `List[str]`, `List[List[str]]`):
special_tokens_map.json CHANGED
@@ -1,107 +1,51 @@
1
  {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc595fcd800672d6ef0e2aea5ba0f1dc8826471f6f40a87b70619dcc60ddd4a
3
- size 1466734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4368e6b7901e8c54bd62326ca5a5063eba36c31d74995a599a5be77a0cd5cfd0
3
+ size 1592581
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,117 +1,67 @@
1
  {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
  "auto_map": {
105
  "AutoProcessor": "processing_veld.VELDProcessor"
106
  },
107
- "eos_token": "</s>",
108
- "extra_ids": 100,
109
- "model_max_length": 512,
110
- "name_or_path": "checkpoints/veld_e1_linear",
111
- "pad_token": "<pad>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  "processor_class": "VELDProcessor",
 
 
 
 
 
 
 
 
113
  "sp_model_kwargs": {},
114
- "special_tokens_map_file": null,
115
- "tokenizer_class": "T5Tokenizer",
116
- "unk_token": "<unk>"
 
 
 
 
 
 
 
117
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "auto_map": {
3
  "AutoProcessor": "processing_veld.VELDProcessor"
4
  },
5
+ "bos_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "cls_token": {
14
+ "__type": "AddedToken",
15
+ "content": "[CLS]",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "eos_token": {
22
+ "__type": "AddedToken",
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
+ "mask_token": {
30
+ "__type": "AddedToken",
31
+ "content": "[MASK]",
32
+ "lstrip": true,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "name_or_path": "veld_e0_linear",
39
+ "pad_token": {
40
+ "__type": "AddedToken",
41
+ "content": "<pad>",
42
+ "lstrip": false,
43
+ "normalized": true,
44
+ "rstrip": false,
45
+ "single_word": false
46
+ },
47
  "processor_class": "VELDProcessor",
48
+ "sep_token": {
49
+ "__type": "AddedToken",
50
+ "content": "[SEP]",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
  "sp_model_kwargs": {},
57
+ "special_tokens_map_file": "vocab/ko_en/spiece/ko20000vs64000_ext/special_tokens_map.json",
58
+ "tokenizer_class": "BigBirdTokenizer",
59
+ "unk_token": {
60
+ "__type": "AddedToken",
61
+ "content": "<unk>",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ }
67
  }