NagaSaiAbhinay
commited on
Upload folder using huggingface_hub
Browse files- .ipynb_checkpoints/config-checkpoint.py +42 -0
- .ipynb_checkpoints/model-checkpoint.py +16 -0
- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/model.cpython-311.pyc +0 -0
- config.json +6 -0
- config.py +42 -0
- model.py +16 -0
.ipynb_checkpoints/config-checkpoint.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
|
3 |
+
class CSDConfig(PretrainedConfig):
|
4 |
+
model_type = "CSDModel"
|
5 |
+
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
attention_dropout:float=0.0,
|
9 |
+
dropout:float=0.0,
|
10 |
+
hidden_act:str= "quick_gelu",
|
11 |
+
hidden_size:int= 1024,
|
12 |
+
image_size:int= 224,
|
13 |
+
initializer_factor:float= 1.0,
|
14 |
+
initializer_range:float=0.02,
|
15 |
+
intermediate_size:int=4096,
|
16 |
+
layer_norm_eps:float=1e-05,
|
17 |
+
num_attention_heads:int=16,
|
18 |
+
num_channels:int=3,
|
19 |
+
num_hidden_layers:int=24,
|
20 |
+
patch_size:int= 14,
|
21 |
+
projection_dim:int=768,
|
22 |
+
style_projection_dim:int=768,
|
23 |
+
content_projection_dim:int=768,
|
24 |
+
**kwargs,
|
25 |
+
):
|
26 |
+
super().__init__(**kwargs)
|
27 |
+
self.attention_dropout=attention_dropout
|
28 |
+
self.dropout=dropout
|
29 |
+
self.hidden_act=hidden_act
|
30 |
+
self.hidden_size=hidden_size
|
31 |
+
self.image_size=image_size
|
32 |
+
self.initializer_factor=initializer_factor
|
33 |
+
self.initializer_range=initializer_range
|
34 |
+
self.intermediate_size=intermediate_size
|
35 |
+
self.layer_norm_eps=layer_norm_eps
|
36 |
+
self.num_attention_heads=num_attention_heads
|
37 |
+
self.num_channels=num_channels
|
38 |
+
self.num_hidden_layers=num_hidden_layers
|
39 |
+
self.patch_size=patch_size
|
40 |
+
self.projection_dim=projection_dim
|
41 |
+
self.style_projection_dim=style_projection_dim
|
42 |
+
self.content_projection_dim=content_projection_dim
|
.ipynb_checkpoints/model-checkpoint.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
from .config import CSDConfig
|
3 |
+
from transformers import PreTrainedModel, CLIPVisionModel
|
4 |
+
|
5 |
+
class CSDModel(PreTrainedModel):
|
6 |
+
config_class = CSDConfig
|
7 |
+
def __init__(self, config: CSDConfig):
|
8 |
+
super().__init__(config)
|
9 |
+
self.backbone = CLIPVisionModel(config)
|
10 |
+
self.out_style = nn.Linear(config.hidden_size, config.style_projection_dim, bias=False)
|
11 |
+
self.out_content = nn.Linear(config.hidden_size, config.content_projection_dim, bias=False)
|
12 |
+
|
13 |
+
def forward(self, pixel_values):
|
14 |
+
features = self.backbone(pixel_values)
|
15 |
+
style_embeds = self.out_style(features)
|
16 |
+
content_embeds = self.out_content(features)
|
__pycache__/config.cpython-311.pyc
ADDED
Binary file (2.07 kB). View file
|
|
__pycache__/model.cpython-311.pyc
ADDED
Binary file (1.7 kB). View file
|
|
config.json
CHANGED
@@ -1,8 +1,13 @@
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"CSDModel"
|
4 |
],
|
5 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
|
|
6 |
"content_projection_dim": 768,
|
7 |
"dropout": 0.0,
|
8 |
"hidden_act": "quick_gelu",
|
@@ -12,6 +17,7 @@
|
|
12 |
"initializer_range": 0.02,
|
13 |
"intermediate_size": 4096,
|
14 |
"layer_norm_eps": 1e-05,
|
|
|
15 |
"num_attention_heads": 16,
|
16 |
"num_channels": 3,
|
17 |
"num_hidden_layers": 24,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./",
|
3 |
"architectures": [
|
4 |
"CSDModel"
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "config.CSDConfig",
|
9 |
+
"AutoModel": "model.CSDModel"
|
10 |
+
},
|
11 |
"content_projection_dim": 768,
|
12 |
"dropout": 0.0,
|
13 |
"hidden_act": "quick_gelu",
|
|
|
17 |
"initializer_range": 0.02,
|
18 |
"intermediate_size": 4096,
|
19 |
"layer_norm_eps": 1e-05,
|
20 |
+
"model_type": "CSDModel",
|
21 |
"num_attention_heads": 16,
|
22 |
"num_channels": 3,
|
23 |
"num_hidden_layers": 24,
|
config.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
|
3 |
+
class CSDConfig(PretrainedConfig):
|
4 |
+
model_type = "CSDModel"
|
5 |
+
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
attention_dropout:float=0.0,
|
9 |
+
dropout:float=0.0,
|
10 |
+
hidden_act:str= "quick_gelu",
|
11 |
+
hidden_size:int= 1024,
|
12 |
+
image_size:int= 224,
|
13 |
+
initializer_factor:float= 1.0,
|
14 |
+
initializer_range:float=0.02,
|
15 |
+
intermediate_size:int=4096,
|
16 |
+
layer_norm_eps:float=1e-05,
|
17 |
+
num_attention_heads:int=16,
|
18 |
+
num_channels:int=3,
|
19 |
+
num_hidden_layers:int=24,
|
20 |
+
patch_size:int= 14,
|
21 |
+
projection_dim:int=768,
|
22 |
+
style_projection_dim:int=768,
|
23 |
+
content_projection_dim:int=768,
|
24 |
+
**kwargs,
|
25 |
+
):
|
26 |
+
super().__init__(**kwargs)
|
27 |
+
self.attention_dropout=attention_dropout
|
28 |
+
self.dropout=dropout
|
29 |
+
self.hidden_act=hidden_act
|
30 |
+
self.hidden_size=hidden_size
|
31 |
+
self.image_size=image_size
|
32 |
+
self.initializer_factor=initializer_factor
|
33 |
+
self.initializer_range=initializer_range
|
34 |
+
self.intermediate_size=intermediate_size
|
35 |
+
self.layer_norm_eps=layer_norm_eps
|
36 |
+
self.num_attention_heads=num_attention_heads
|
37 |
+
self.num_channels=num_channels
|
38 |
+
self.num_hidden_layers=num_hidden_layers
|
39 |
+
self.patch_size=patch_size
|
40 |
+
self.projection_dim=projection_dim
|
41 |
+
self.style_projection_dim=style_projection_dim
|
42 |
+
self.content_projection_dim=content_projection_dim
|
model.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
from .config import CSDConfig
|
3 |
+
from transformers import PreTrainedModel, CLIPVisionModel
|
4 |
+
|
5 |
+
class CSDModel(PreTrainedModel):
|
6 |
+
config_class = CSDConfig
|
7 |
+
def __init__(self, config: CSDConfig):
|
8 |
+
super().__init__(config)
|
9 |
+
self.backbone = CLIPVisionModel(config)
|
10 |
+
self.out_style = nn.Linear(config.hidden_size, config.style_projection_dim, bias=False)
|
11 |
+
self.out_content = nn.Linear(config.hidden_size, config.content_projection_dim, bias=False)
|
12 |
+
|
13 |
+
def forward(self, pixel_values):
|
14 |
+
features = self.backbone(pixel_values)
|
15 |
+
style_embeds = self.out_style(features)
|
16 |
+
content_embeds = self.out_content(features)
|