MDDDDR commited on
Commit
ca314ef
โ€ข
1 Parent(s): 2f25de8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +61 -0
README.md CHANGED
@@ -1,2 +1,63 @@
1
 
2
  ์–ด๋Š์ •๋„ ์ด์ œ๋Š” ๋ง์„ ์กฐ๊ธˆ ์•Œ์•„ ๋“ฃ๋Š”๋‹ค.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  ์–ด๋Š์ •๋„ ์ด์ œ๋Š” ๋ง์„ ์กฐ๊ธˆ ์•Œ์•„ ๋“ฃ๋Š”๋‹ค.
3
+
4
+ ### 8bit ํ•™์Šต ๊ธฐ์ค€
5
+
6
+ {
7
+ "_name_or_path": "saltlux/Ko-Llama3-Luxia-8B",
8
+ "architectures": [
9
+ "LlamaForCausalLM"
10
+ ],
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 128000,
14
+ "eos_token_id": 128001,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 4096,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 14336,
19
+ "max_position_embeddings": 8192,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 32,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": null,
27
+ "rope_theta": 500000.0,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "float16",
30
+ "transformers_version": "4.39.1",
31
+ "use_cache": false,
32
+ "vocab_size": 145792
33
+ }
34
+
35
+ batch_size = 16
36
+ num_epochs = 1
37
+ micro_batch = 1
38
+ gradient_accumulation_steps = batch_size
39
+
40
+ ### ํ›ˆ๋ จ ๋ฐฉ๋ฒ•์— ๋Œ€ํ•œ ํ•˜์ดํผ ํŒŒ๋ผ๋ฏธํ„ฐ
41
+ cutoff_len = model.config.hidden_size
42
+ lr_scheduler = 'cosine'
43
+ warmup_ratio = 0.06 # warmup_steps = 100
44
+ learning_rate = 2e-4
45
+ optimizer = 'adamw_torch'
46
+ weight_decay = 0.01
47
+ max_grad_norm = 0.8 # ๋ชจ๋ธ์ด ๋„ˆ๋ฌด ๊ณผ์ ํ•ฉ๋˜๋Š”๊ฑฐ๊ฐ™๊ฑฐ๋‚˜ ์ด์ƒํ•œ ๋ฐฉํ–ฅ์œผ๋กœ ํ•™์Šต์ด ๋˜๋Š”๊ฑฐ๊ฐ™์œผ๋ฉด ๊ฐ’์„ ์ค„์—ฌ๋ณด์ž.
48
+
49
+ ### LoRA config
50
+ lora_r = 16
51
+ lora_alpha = 16
52
+ lora_dropout = 0.05
53
+ lora_target_modules = ["gate_proj", "down_proj", "up_proj"]
54
+
55
+ ### Tokenizer์—์„œ ๋‚˜์˜ค๋Š” input๊ฐ’ ์„ค์ • ์˜ต์…˜
56
+ train_on_inputs = False
57
+ add_eos_token = True
58
+
59
+ val_data = None
60
+
61
+ # Others
62
+ resume_from_checkpoint = False # !! ๋งŒ์•ฝ ๋ชจ๋ธ์„ ์ด์–ด์„œ ํ›ˆ๋ จํ•˜๊ณ  ์‹ถ๋‹ค๋ฉด, './custom_LLM/checkpoint-[xxx]'์™€ ๊ฐ™์ด ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ž…๋ ฅํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค!
63
+ output_dir = './custom_LLM'