cwiz commited on
Commit
1a27c9e
1 Parent(s): 9ff33bd

Upload 13 files

Browse files
README.md CHANGED
@@ -1,3 +1,7 @@
1
  ---
2
- license: apache-2.0
3
  ---
 
 
 
 
 
1
  ---
2
+ library_name: peft
3
  ---
4
+
5
+ # LLaMa-Saiga-7b-Gofman
6
+
7
+ [llama-7b-saiga-merged](https://huggingface.co/cwiz/llama-7b-saiga-merged) trained on [Igor Gofman](https://github.com/Shoe-Eye/gofman-digital-oracle) dataset.
adapter_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "cwiz/llama-7b-saiga-merged",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 64,
10
+ "lora_dropout": 0.05,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 32,
14
+ "revision": null,
15
+ "target_modules": ["q_proj", "v_proj"],
16
+ "task_type": "CAUSAL_LM"
17
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba35964fd471e7c77ada5454a43d5145fc28686948b1d32b09f3090b087f724
3
+ size 67154893
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token_id": 0,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "temperature": 0.8,
6
+ "top_p": 0.9,
7
+ "top_k": 40,
8
+ "do_sample": true,
9
+ "num_beams": 1,
10
+ "max_new_tokens": 1536,
11
+ "repetition_penalty": 1.1,
12
+ "no_repeat_ngram_size": 15
13
+ }
huggingface-metadata.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ url: https://huggingface.co/IlyaGusev/saiga_7b_lora
2
+ branch: main
3
+ download date: 2023-07-09 17:43:52
4
+ sha256sum:
5
+ eb512e1c4bbb3123c4b482e5f3a996a7b8fdb10a48a2799e15c01b4ced3a58cf adapter_model.bin
6
+ 9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 tokenizer.model
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<unk>",
5
+ "sep_token": "<s>",
6
+ "unk_token": "<unk>"
7
+ }
template.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "system_prompt": "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им.",
3
+ "message_template": "<s>{role}\n{content}</s>\n",
4
+ "start_token_id": 1,
5
+ "end_token_id": 2,
6
+ "bot_token_id": 9225
7
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "LlamaTokenizer",
3
+ "add_bos_token": false,
4
+ "add_eos_token": false,
5
+ "model_max_length": 2048,
6
+ "padding_side": "left",
7
+ "bos_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "unk_token": "<unk>",
10
+ "clean_up_tokenization_spaces": false,
11
+ "special_tokens_map_file": "special_tokens_map.json"
12
+ }
training_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "evaluation_strategy": "steps",
4
+ "per_device_train_batch_size": 4,
5
+ "per_device_eval_batch_size": 4,
6
+ "gradient_accumulation_steps": 32,
7
+ "eval_steps": 50,
8
+ "save_steps": 50,
9
+ "logging_steps": 5,
10
+ "learning_rate": 0.0003,
11
+ "num_train_epochs": 5,
12
+ "lr_scheduler_type": "cosine",
13
+ "warmup_steps": 30,
14
+ "fp16": true,
15
+ "bf16": false,
16
+ "torch_compile": false,
17
+ "optim": "adamw_torch"
18
+ },
19
+ "lora": {
20
+ "r": 16,
21
+ "lora_alpha": 16,
22
+ "lora_dropout": 0.05,
23
+ "bias": "none",
24
+ "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],
25
+ "task_type": "CAUSAL_LM"
26
+ },
27
+ "load_in_8bit": true,
28
+ "only_target_loss": true,
29
+ "mode": "chat",
30
+ "templates_path": "internal_prompts/saiga_v2.json",
31
+ "model_name": "models/llama-7b",
32
+ "model_type": "causal",
33
+ "max_tokens_count": 2000
34
+ }
training_log.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "llama-saiga",
3
+ "base_model_class": "LlamaForCausalLM",
4
+ "base_loaded_in_4bit": false,
5
+ "base_loaded_in_8bit": true,
6
+ "loss": 2.4306,
7
+ "learning_rate": 0.0,
8
+ "epoch": 3.0,
9
+ "current_steps": 4319,
10
+ "train_runtime": 3963.6329,
11
+ "train_samples_per_second": 4.36,
12
+ "train_steps_per_second": 0.034,
13
+ "total_flos": 1.7670854947110912e+17,
14
+ "train_loss": 2.5247063248245802
15
+ }
training_parameters.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lora_name": "llama-saiga-7b-gofman",
3
+ "always_override": true,
4
+ "save_steps": 0.0,
5
+ "micro_batch_size": 4,
6
+ "batch_size": 128,
7
+ "epochs": 3.0,
8
+ "learning_rate": "3e-4",
9
+ "lr_scheduler_type": "linear",
10
+ "lora_rank": 32,
11
+ "lora_alpha": 64,
12
+ "lora_dropout": 0.05,
13
+ "cutoff_len": 256,
14
+ "dataset": "None",
15
+ "eval_dataset": "None",
16
+ "format": "None",
17
+ "eval_steps": 100.0,
18
+ "raw_text_file": "gofman",
19
+ "overlap_len": 128,
20
+ "newline_favor_len": 128,
21
+ "higher_rank_limit": false,
22
+ "warmup_steps": 100.0,
23
+ "optimizer": "adamw_torch",
24
+ "hard_cut_string": "\\n\\n",
25
+ "train_only_after": "",
26
+ "stop_at_loss": 0
27
+ }
training_prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "template_type": "raw_text"
3
+ }