esunn0412
commited on
Commit
•
9085c0d
1
Parent(s):
921a3d2
tei support
Browse files- config.json +14 -45
- conv.py +71 -0
- model.safetensors +2 -2
- original_config.json +51 -0
- original_model.safetensors +3 -0
- resolve/main/config.json +25 -0
config.json
CHANGED
@@ -1,51 +1,20 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"
|
4 |
-
"XLMRobertaModel"
|
5 |
-
],
|
6 |
-
"attention_probs_dropout_prob": 0.1,
|
7 |
-
"auto_map": {
|
8 |
-
"AutoConfig": "jinaai/xlm-roberta-flash-implementation--configuration_xlm_roberta.XLMRobertaFlashConfig",
|
9 |
-
"AutoModel": "jinaai/xlm-roberta-flash-implementation--modeling_lora.XLMRobertaLoRA",
|
10 |
-
"AutoModelForMaskedLM": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForMaskedLM",
|
11 |
-
"AutoModelForPreTraining": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForPreTraining"
|
12 |
-
},
|
13 |
-
"bos_token_id": 0,
|
14 |
-
"classifier_dropout": null,
|
15 |
-
"emb_pooler": null,
|
16 |
-
"eos_token_id": 2,
|
17 |
-
"hidden_act": "gelu",
|
18 |
-
"hidden_dropout_prob": 0.1,
|
19 |
"hidden_size": 1024,
|
20 |
-
"
|
|
|
21 |
"intermediate_size": 4096,
|
22 |
-
"
|
23 |
-
"load_trained_adapters": true,
|
24 |
-
"lora_adaptations": ["retrieval.query", "retrieval.passage", "separation", "classification", "text-matching"],
|
25 |
-
"lora_alpha": 1,
|
26 |
-
"lora_dropout_p": 0.0,
|
27 |
-
"lora_main_params_trainable": false,
|
28 |
-
"lora_rank": 4,
|
29 |
-
"matryoshka_dimensions": [32, 64, 128, 256, 512, 768, 1024],
|
30 |
"max_position_embeddings": 8194,
|
31 |
-
"num_attention_heads": 16,
|
32 |
-
"num_hidden_layers": 24,
|
33 |
-
"output_past": true,
|
34 |
-
"pad_token_id": 1,
|
35 |
-
"position_embedding_type": "rotary",
|
36 |
-
"rotary_emb_base": 20000.0,
|
37 |
-
"torch_dtype": "bfloat16",
|
38 |
-
"transformers_version": "4.30.2",
|
39 |
-
"truncate_dim": null,
|
40 |
"type_vocab_size": 1,
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
-
"
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
}
|
51 |
}
|
|
|
1 |
{
|
2 |
+
"model_type": "new",
|
3 |
+
"vocab_size": 250002,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"hidden_size": 1024,
|
5 |
+
"num_hidden_layers": 24,
|
6 |
+
"num_attention_heads": 16,
|
7 |
"intermediate_size": 4096,
|
8 |
+
"hidden_act": "gelu",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"max_position_embeddings": 8194,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
"type_vocab_size": 1,
|
11 |
+
"layer_norm_type": "layer_norm",
|
12 |
+
"layer_norm_eps": 1e-5,
|
13 |
+
"position_embedding_type": "rope",
|
14 |
+
"rope_theta": 10000.0,
|
15 |
+
"rope_scaling": null,
|
16 |
+
"logn_attention_scale": false,
|
17 |
+
"logn_attention_clip1": false,
|
18 |
+
"architectures": ["GTEModel"],
|
19 |
+
"model_max_length": 8194
|
|
|
20 |
}
|
conv.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from safetensors import safe_open
|
2 |
+
from safetensors.torch import save_file
|
3 |
+
import torch
|
4 |
+
|
5 |
+
def rename_key(key):
|
6 |
+
parts = key.split('.')
|
7 |
+
if 'roberta' in parts:
|
8 |
+
parts.remove('roberta')
|
9 |
+
if 'parametrizations' in parts:
|
10 |
+
parts.remove('parametrizations')
|
11 |
+
if 'weight' in parts and 'original' in parts:
|
12 |
+
parts.remove('original')
|
13 |
+
|
14 |
+
if 'encoder.layers' in key:
|
15 |
+
parts[parts.index('layers')] = 'layer'
|
16 |
+
|
17 |
+
if 'mixer' in parts:
|
18 |
+
parts[parts.index('mixer')] = 'attention'
|
19 |
+
if 'Wqkv' in parts:
|
20 |
+
parts[parts.index('Wqkv')] = 'qkv_proj'
|
21 |
+
if 'out_proj' in parts:
|
22 |
+
parts[parts.index('out_proj')] = 'o_proj'
|
23 |
+
if 'mlp.fc1' in key:
|
24 |
+
parts[parts.index('fc1')] = 'up_proj'
|
25 |
+
if 'mlp.fc2' in key:
|
26 |
+
parts[parts.index('fc2')] = 'down_proj'
|
27 |
+
if 'emb_ln' in parts:
|
28 |
+
parts[parts.index('emb_ln')] = 'LayerNorm'
|
29 |
+
parts.insert(0, 'embeddings')
|
30 |
+
if 'norm1' in parts:
|
31 |
+
parts[parts.index('norm1')] = 'attn_ln'
|
32 |
+
if 'norm2' in parts:
|
33 |
+
parts[parts.index('norm2')] = 'mlp_ln'
|
34 |
+
if 'weight' in parts:
|
35 |
+
if parts[-2] in ['attn_ln', 'mlp_ln', 'LayerNorm']:
|
36 |
+
parts[-1] = 'gamma'
|
37 |
+
if 'bias' in parts:
|
38 |
+
if parts[-2] in ['attn_ln', 'mlp_ln', 'LayerNorm']:
|
39 |
+
parts[-1] = 'beta'
|
40 |
+
|
41 |
+
return '.'.join(parts)
|
42 |
+
|
43 |
+
input_file = "original_model.safetensors"
|
44 |
+
output_file = "model.safetensors"
|
45 |
+
|
46 |
+
new_tensors = {}
|
47 |
+
|
48 |
+
with safe_open(input_file, framework="pt", device="cpu") as f:
|
49 |
+
for key in f.keys():
|
50 |
+
if 'lora' not in key:
|
51 |
+
new_key = rename_key(key)
|
52 |
+
tensor = f.get_tensor(key)
|
53 |
+
|
54 |
+
if 'mlp.up_proj' in new_key:
|
55 |
+
# Create up_proj and up_gate_proj
|
56 |
+
new_tensors[new_key] = tensor
|
57 |
+
gate_key = new_key.replace('up_proj', 'up_gate_proj')
|
58 |
+
# Expand the tensor to match the expected shape
|
59 |
+
expanded_tensor = torch.cat([tensor] * 2, dim=0)
|
60 |
+
new_tensors[gate_key] = expanded_tensor
|
61 |
+
else:
|
62 |
+
new_tensors[new_key] = tensor
|
63 |
+
|
64 |
+
save_file(new_tensors, output_file)
|
65 |
+
print(f"Renamed tensors saved to {output_file}")
|
66 |
+
|
67 |
+
# Inspect the renamed tensors
|
68 |
+
with safe_open(output_file, framework="pt", device="cpu") as f:
|
69 |
+
print("\nRenamed tensors:")
|
70 |
+
for key in f.keys():
|
71 |
+
print(f"{key}: {f.get_tensor(key).shape}")
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd20adef5134ed345186af0acc5066662700c49c5c328a5d358f60f176f274a3
|
3 |
+
size 1521812032
|
original_config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "jina-embeddings-v3-xlm-large-pairs-vol5-lora",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "jinaai/xlm-roberta-flash-implementation--configuration_xlm_roberta.XLMRobertaFlashConfig",
|
9 |
+
"AutoModel": "jinaai/xlm-roberta-flash-implementation--modeling_lora.XLMRobertaLoRA",
|
10 |
+
"AutoModelForMaskedLM": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForMaskedLM",
|
11 |
+
"AutoModelForPreTraining": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForPreTraining"
|
12 |
+
},
|
13 |
+
"bos_token_id": 0,
|
14 |
+
"classifier_dropout": null,
|
15 |
+
"emb_pooler": null,
|
16 |
+
"eos_token_id": 2,
|
17 |
+
"hidden_act": "gelu",
|
18 |
+
"hidden_dropout_prob": 0.1,
|
19 |
+
"hidden_size": 1024,
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 4096,
|
22 |
+
"layer_norm_eps": 1e-05,
|
23 |
+
"load_trained_adapters": true,
|
24 |
+
"lora_adaptations": ["retrieval.query", "retrieval.passage", "separation", "classification", "text-matching"],
|
25 |
+
"lora_alpha": 1,
|
26 |
+
"lora_dropout_p": 0.0,
|
27 |
+
"lora_main_params_trainable": false,
|
28 |
+
"lora_rank": 4,
|
29 |
+
"matryoshka_dimensions": [32, 64, 128, 256, 512, 768, 1024],
|
30 |
+
"max_position_embeddings": 8194,
|
31 |
+
"num_attention_heads": 16,
|
32 |
+
"num_hidden_layers": 24,
|
33 |
+
"output_past": true,
|
34 |
+
"pad_token_id": 1,
|
35 |
+
"position_embedding_type": "rotary",
|
36 |
+
"rotary_emb_base": 20000.0,
|
37 |
+
"torch_dtype": "bfloat16",
|
38 |
+
"transformers_version": "4.30.2",
|
39 |
+
"truncate_dim": null,
|
40 |
+
"type_vocab_size": 1,
|
41 |
+
"use_cache": true,
|
42 |
+
"use_flash_attn": true,
|
43 |
+
"vocab_size": 250002,
|
44 |
+
"task_instructions": {
|
45 |
+
"retrieval.query": "Represent the query for retrieving evidence documents: ",
|
46 |
+
"retrieval.passage": "Represent the document for retrieval: ",
|
47 |
+
"separation": "",
|
48 |
+
"classification": "",
|
49 |
+
"text-matching": ""
|
50 |
+
}
|
51 |
+
}
|
original_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17ca06efd886a065d0081912b04c9e27ef5086a9dd09659cce32aa9c84587f23
|
3 |
+
size 1144685320
|
resolve/main/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "jina-embeddings-v3-xlm-large-pairs-vol5-lora",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaModel"
|
5 |
+
],
|
6 |
+
"model_type": "xlm-roberta",
|
7 |
+
"max_position_embeddings": 8194,
|
8 |
+
"pad_token_id": 1,
|
9 |
+
"attention_probs_dropout_prob": 0.1,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 1024,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"vocab_size": 250002,
|
19 |
+
"type_vocab_size": 1,
|
20 |
+
"bos_token_id": 0,
|
21 |
+
"eos_token_id": 2,
|
22 |
+
"classifier_dropout": null,
|
23 |
+
"output_past": true,
|
24 |
+
"position_embedding_type": "absolute"
|
25 |
+
}
|