rwightman HF staff commited on
Commit
f06ed88
·
verified ·
1 Parent(s): 4202148
Files changed (4) hide show
  1. README.md +22 -0
  2. config.json +33 -0
  3. model.safetensors +3 -0
  4. pytorch_model.bin +3 -0
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - timm
4
+ - transformers
5
+ - image-feature-extraction
6
+ - siglip
7
+ - siglip2
8
+ library_name: timm
9
+ license: apache-2.0
10
+ datasets:
11
+ - webli
12
+ ---
13
+ # Model card for vit_base_patch16_siglip_224.v2_webli
14
+
15
+ A SigLIP 2 ViT (image encoder only) for `timm`. Equivalent to image tower from https://huggingface.co/timm/ViT-B-16-SigLIP2.
16
+
17
+
18
+ ## Model Details
19
+ - **Dataset:** webli
20
+ - **Papers:**
21
+ - SigLIP 2: Multilingual Vision-Language Encoders with Improved Semantic Understanding, Localization, and Dense Features: https://arxiv.org/abs/2502.14786
22
+ - Sigmoid Loss for Language Image Pre-Training: https://arxiv.org/abs/2303.15343
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "vit_base_patch16_siglip_224",
3
+ "num_classes": 0,
4
+ "num_features": 768,
5
+ "global_pool": "map",
6
+ "pretrained_cfg": {
7
+ "tag": "v2_webli",
8
+ "custom_load": false,
9
+ "input_size": [
10
+ 3,
11
+ 224,
12
+ 224
13
+ ],
14
+ "fixed_input_size": true,
15
+ "interpolation": "bicubic",
16
+ "crop_pct": 0.9,
17
+ "crop_mode": "center",
18
+ "mean": [
19
+ 0.5,
20
+ 0.5,
21
+ 0.5
22
+ ],
23
+ "std": [
24
+ 0.5,
25
+ 0.5,
26
+ 0.5
27
+ ],
28
+ "num_classes": 0,
29
+ "pool_size": null,
30
+ "first_conv": "patch_embed.proj",
31
+ "classifier": "head"
32
+ }
33
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9106b0d8d9d02ea90fc3571fffd1557cf444736f695ee40b1e57c856bc3d9494
3
+ size 371551936
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5df7fa97ed317f26362d9499f38b971e117751362a3f6ee1b7f92552d7faf23
3
+ size 371597458