Upload ONNX weights

#1
by Xenova HF staff - opened
config.json CHANGED
@@ -27,6 +27,12 @@
27
  "tie_word_embeddings": true,
28
  "torch_dtype": "bfloat16",
29
  "transformers_version": "4.42.3",
 
 
 
 
 
 
30
  "use_cache": true,
31
  "vocab_size": 49152
32
  }
 
27
  "tie_word_embeddings": true,
28
  "torch_dtype": "bfloat16",
29
  "transformers_version": "4.42.3",
30
+ "transformers.js_config": {
31
+ "kv_cache_dtype": {
32
+ "q4f16": "float16",
33
+ "fp16": "float16"
34
+ }
35
+ },
36
  "use_cache": true,
37
  "vocab_size": 49152
38
  }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761b9a4d535baafef46c86546942dc6e14f869bdefb7c85f07ec390f2167faeb
3
+ size 653920749
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2af0dc2059e925c9408e8a9aac1d901c4d8db9ce3a1c2ade10966b35d2c40c
3
+ size 327218892
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b32d1d5a31364b71e843a61493df65691c267e79890225788def79e2c16148c
3
+ size 199708970
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cb1d22cd714e37c998deba351c9233f7f2dac5bb4a155bbe0811a6e6eb3553
3
+ size 133950950