svakhreev commited on
Commit
336c052
1 Parent(s): 20f9874

Upload GPTRefactForCausalLM

Browse files
Files changed (2) hide show
  1. configuration_gpt_refact.py +11 -12
  2. pytorch_model.bin +1 -1
configuration_gpt_refact.py CHANGED
@@ -17,25 +17,24 @@ class GPTRefactConfig(PretrainedConfig):
17
 
18
  def __init__(
19
  self,
20
- vocab_size=49216,
21
- n_positions=1024,
22
- n_embd=768,
23
- n_layer=12,
24
- n_head=12,
25
- n_inner=None,
26
- resid_pdrop=0.1,
27
- embd_pdrop=0.1,
28
- attn_pdrop=0.1,
29
  layer_norm_epsilon=1e-5,
30
  initializer_range=0.02,
31
  scale_attn_weights=True,
32
  use_cache=True,
33
  bos_token_id=-1,
34
  eos_token_id=0,
35
- max_position_embeddings: int = 4096,
36
- multi_query: bool = True,
37
  attention_softmax_in_fp32=False,
38
  scale_attention_softmax_in_fp32=False,
 
 
 
39
  **kwargs,
40
  ):
41
  self.vocab_size = vocab_size
@@ -43,7 +42,7 @@ class GPTRefactConfig(PretrainedConfig):
43
  self.n_embd = n_embd
44
  self.n_layer = n_layer
45
  self.n_head = n_head
46
- self.n_inner = n_inner
47
  self.resid_pdrop = resid_pdrop
48
  self.embd_pdrop = embd_pdrop
49
  self.attn_pdrop = attn_pdrop
 
17
 
18
  def __init__(
19
  self,
20
+ vocab_size: int = 49216,
21
+ n_positions: int = 4096,
22
+ n_embd: int = 1024,
23
+ n_layer: int = 32,
24
+ n_head: int = 64,
25
+ max_position_embeddings: int = 4096,
26
+ multi_query: bool = True,
 
 
27
  layer_norm_epsilon=1e-5,
28
  initializer_range=0.02,
29
  scale_attn_weights=True,
30
  use_cache=True,
31
  bos_token_id=-1,
32
  eos_token_id=0,
 
 
33
  attention_softmax_in_fp32=False,
34
  scale_attention_softmax_in_fp32=False,
35
+ resid_pdrop=0.1,
36
+ embd_pdrop=0.1,
37
+ attn_pdrop=0.1,
38
  **kwargs,
39
  ):
40
  self.vocab_size = vocab_size
 
42
  self.n_embd = n_embd
43
  self.n_layer = n_layer
44
  self.n_head = n_head
45
+ self.n_inner = None
46
  self.resid_pdrop = resid_pdrop
47
  self.embd_pdrop = embd_pdrop
48
  self.attn_pdrop = attn_pdrop
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81388e4a168bb437a7a09af6c8b6c2943990276ee62c2f449cd2bdff257e8860
3
  size 6343461637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb422076b1a52027d21772a7c4cbb7365078c2dd489384a97d6ce61c8b7b7204
3
  size 6343461637