File size: 925 Bytes
cd1c48a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from dataclasses import dataclass
from typing import Optional

@dataclass
class SmolLM2Config:
    hidden_size: int = 576
    num_hidden_layers: int = 30
    num_attention_heads: int = 9
    num_key_value_heads: int = 3
    intermediate_size: int = 1536
    hidden_act: str = "silu"
    max_position_embeddings: int = 2048
    initializer_range: float = 0.041666666666666664
    rms_norm_eps: float = 1.0e-5
    vocab_size: int = 49152
    rope_theta: float = 10000.0
    rope_interleaved: bool = False
    rope_scaling: Optional[dict] = None
    tie_word_embeddings: bool = True
    bos_token_id: int = 0
    eos_token_id: int = 0
    pad_token_id: None = None
    is_llama_config: bool = True
    pretraining_tp: int = 1
    use_cache: bool = True
    
    def __post_init__(self):
        self.head_dim = self.hidden_size // self.num_attention_heads
        self.kv_head_dim = self.hidden_size // self.num_key_value_heads