calisolo commited on
Commit
e2456c5
·
1 Parent(s): e617860

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -8
  2. README.md +55 -6
  3. config.json +52 -0
  4. merges.txt +0 -0
  5. pytorch_model.bin +3 -0
  6. vocab.json +0 -0
.gitattributes CHANGED
@@ -2,27 +2,20 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
@@ -30,5 +23,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
31
  *.xz filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
 
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,61 @@
1
  ---
2
  license: apache-2.0
3
  ---
4
- OFA-huge checkpoint optimized for image-captioning
5
 
6
- It is transformed from fairseq to huggingface and no other processing has been performed.
 
7
 
8
- This checkpoint is not uploaded by the official OFA-sys, so i upload it for use.
9
 
10
- Thanks to OFA-sys
11
- (https://github.com/OFA-Sys/OFA)
12
- (https://huggingface.co/OFA-Sys/ofa-huge)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
 
4
 
5
+ # OFA-huge
6
+ This is the **huge** version of OFA pretrained model. OFA is a unified multimodal pretrained model that unifies modalities (i.e., cross-modality, vision, language) and tasks (e.g., image generation, visual grounding, image captioning, image classification, text generation, etc.) to a simple sequence-to-sequence learning framework.
7
 
8
+ The directory includes 4 files, namely `config.json` which consists of model configuration, `vocab.json` and `merge.txt` for our OFA tokenizer, and lastly `pytorch_model.bin` which consists of model weights. There is no need to worry about the mismatch between Fairseq and transformers, since we have addressed the issue yet.
9
 
10
+ To use it in transformers, please refer to https://github.com/OFA-Sys/OFA/tree/feature/add_transformers. Install the transformers and download the models as shown below.
11
+ ```
12
+ git clone --single-branch --branch feature/add_transformers https://github.com/OFA-Sys/OFA.git
13
+ pip install OFA/transformers/
14
+ git clone https://huggingface.co/OFA-Sys/OFA-huge
15
+ ```
16
+ After, refer the path to OFA-huge to `ckpt_dir`, and prepare an image for the testing example below. Also, ensure that you have pillow and torchvision in your environment.
17
+
18
+ ```
19
+ >>> from PIL import Image
20
+ >>> from torchvision import transforms
21
+ >>> from transformers import OFATokenizer, OFAModel
22
+ >>> from generate import sequence_generator
23
+
24
+ >>> mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
25
+ >>> resolution = 480
26
+ >>> patch_resize_transform = transforms.Compose([
27
+ lambda image: image.convert("RGB"),
28
+ transforms.Resize((resolution, resolution), interpolation=Image.BICUBIC),
29
+ transforms.ToTensor(),
30
+ transforms.Normalize(mean=mean, std=std)
31
+ ])
32
+
33
+
34
+ >>> tokenizer = OFATokenizer.from_pretrained(ckpt_dir)
35
+
36
+ >>> txt = " what does the image describe?"
37
+ >>> inputs = tokenizer([txt], return_tensors="pt").input_ids
38
+ >>> img = Image.open(path_to_image)
39
+ >>> patch_img = patch_resize_transform(img).unsqueeze(0)
40
+
41
+
42
+ >>> # using the generator of fairseq version
43
+ >>> model = OFAModel.from_pretrained(ckpt_dir, use_cache=True)
44
+ >>> generator = sequence_generator.SequenceGenerator(
45
+ tokenizer=tokenizer,
46
+ beam_size=5,
47
+ max_len_b=16,
48
+ min_len=0,
49
+ no_repeat_ngram_size=3,
50
+ )
51
+ >>> data = {}
52
+ >>> data["net_input"] = {"input_ids": inputs, 'patch_images': patch_img, 'patch_masks':torch.tensor([True])}
53
+ >>> gen_output = generator.generate([model], data)
54
+ >>> gen = [gen_output[i][0]["tokens"] for i in range(len(gen_output))]
55
+
56
+ >>> # using the generator of huggingface version
57
+ >>> model = OFAModel.from_pretrained(ckpt_dir, use_cache=False)
58
+ >>> gen = model.generate(inputs, patch_images=patch_img, num_beams=5, no_repeat_ngram_size=3)
59
+
60
+ >>> print(tokenizer.batch_decode(gen, skip_special_tokens=True))
61
+ ```
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "add_type_embedding": true,
5
+ "architectures": [
6
+ "OFAModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attn_scale_factor": 2.0,
10
+ "bos_token_id": 0,
11
+ "classifier_dropout": 0.0,
12
+ "code_image_size": 128,
13
+ "code_layernorm_embedding": true,
14
+ "d_model": 1280,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_drop_path_rate": 0.0,
17
+ "decoder_ffn_dim": 5120,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 12,
20
+ "decoder_normalize_before": true,
21
+ "decoder_start_token_id": 0,
22
+ "dropout": 0.1,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_drop_path_rate": 0.0,
25
+ "encoder_ffn_dim": 5120,
26
+ "encoder_layerdrop": 0.0,
27
+ "encoder_layers": 24,
28
+ "encoder_normalize_before": true,
29
+ "entangle_position_embedding": false,
30
+ "eos_token_id": 2,
31
+ "forced_eos_token_id": 2,
32
+ "image_bucket_size": 42,
33
+ "init_std": 0.02,
34
+ "is_encoder_decoder": true,
35
+ "layernorm_embedding": true,
36
+ "max_position_embeddings": 1024,
37
+ "model_type": "ofa",
38
+ "normformer": true,
39
+ "num_hidden_layers": 24,
40
+ "pad_token_id": 1,
41
+ "patch_layernorm_embedding": true,
42
+ "resnet_drop_path_rate": 0.0,
43
+ "resnet_model_path": null,
44
+ "resnet_type": "resnet152",
45
+ "scale_embedding": false,
46
+ "share_decoder_input_output_embed": true,
47
+ "token_bucket_size": 256,
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.15.0",
50
+ "use_cache": false,
51
+ "vocab_size": 59457
52
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc6d4d382ab2405cc94b3d52ea6c6ce57da83c5f4590511f0bb2c4aa98e0e08
3
+ size 4394603544
vocab.json ADDED
The diff for this file is too large to render. See raw diff