Azazelle Blackroot commited on
Commit
5e565c1
·
verified ·
0 Parent(s):

Duplicate from Blackroot/Llama3-RP-Lora

Browse files

Co-authored-by: Coffee Vampire <[email protected]>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +40 -0
  3. adapter_config.json +34 -0
  4. adapter_model.safetensors +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: unlicense
3
+ ---
4
+ ## Overview
5
+
6
+ This is a testing LORA for Llama-3 8B or Llama-3 8B Instruct. The goal for this model was to bring back some of the expressive prose and writing style of the base model, as well as shift the rather dry style of the 8B instruct.
7
+
8
+ ## Data Processing
9
+
10
+ ### Raw Data to Custom Data
11
+
12
+ - Started with ~40GB of raw data
13
+ - Aggressively selected for writing style
14
+ - Cleaned multiple times both automatically and by hand
15
+ - Final dataset size: 78.4MB
16
+ - No synthetic data present in the used dataset
17
+
18
+ ### Data Cleaning Code
19
+
20
+ Once the training is verified to be beneficial (and thus the cleaning was likely to be correct), the data cleaning code will be released.
21
+
22
+ ## Training Procedure
23
+
24
+ ### Training Framework
25
+
26
+ Training was done QLORA style via [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl). The full training script along with the data processing scripts will be released similarly once the procedure is verified to benefit the model in a useful way.
27
+
28
+ ### Training Parameters
29
+
30
+ - Base Model: Llama 3 8B (Non instruct)
31
+ - r: 4
32
+ - alpha: 8
33
+ - dropout: 0
34
+ - warmup: 45 steps
35
+ - epochs: 2
36
+ - lr: constant with warmup
37
+ - optimizer: adamw (torch fused)
38
+ - weight decay: 0.1
39
+ - adam_b1: 0.9
40
+ - adam_b2: 0.999
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/home/blackroot/Desktop/unsloth/fp16/Meta-Llama-3-8B",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 8,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 4,
20
+ "rank_pattern": {},
21
+ "revision": "unsloth",
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "down_proj",
25
+ "k_proj",
26
+ "v_proj",
27
+ "q_proj",
28
+ "o_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9123b4951fa2d13868906842e25c1a1717a46563bdd9fd05db0b71c2cca9d4
3
+ size 42002136