evilfreelancer commited on
Commit
7a7de9f
1 Parent(s): 5f854ed

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -7
README.md CHANGED
@@ -1,13 +1,75 @@
1
  ---
2
  base_model: ai-forever/ruGPT-3.5-13B
3
  library_name: peft
 
 
 
 
 
 
 
 
 
 
 
4
  ---
5
 
6
- # ruGPT-3.5-13B обученная на датасетах functino call
7
 
8
- - IlyaGusev/ru_turbo_alpaca
9
- - IlyaGusev/ru_turbo_alpaca_evol_instruct
10
- - IlyaGusev/ru_turbo_saiga
11
- - IlyaGusev/ru_sharegpt_cleaned
12
- - IlyaGusev/oasst1_ru_main_branch
13
- - lksy/ru_instruct_gpt4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  base_model: ai-forever/ruGPT-3.5-13B
3
  library_name: peft
4
+ license: mit
5
+ datasets:
6
+ - korotkov/glaive-function-calling-v2-ru-parsed
7
+ language:
8
+ - ru
9
+ tags:
10
+ - impruver
11
+ - russian
12
+ - function call
13
+ - lora
14
+ pipeline_tag: text-generation
15
  ---
16
 
17
+ # ruGPT-3.5-13B обученная на датасетах function call
18
 
19
+ https://github.com/EvilFreelancer/impruver/blob/main/configs/ruGPT35_13B_fc_lora.yml
20
+
21
+ ```yaml
22
+ # Training time
23
+ # - 1x RTX 4090 ~ 20Gb VRAM ~ 11h 8m
24
+
25
+ output_dir: ./models/ruGPT35_13B_lora_fc
26
+ train_path: ./train.ruGPT35_13B_fc.jsonl
27
+ val_path: ./val.ruGPT35_13B_fc.jsonl
28
+
29
+ datasets:
30
+ - name: korotkov/glaive-function-calling-v2-ru-parsed
31
+ split: train
32
+
33
+ model:
34
+ class: transformers.AutoModelForCausalLM
35
+ name: ai-forever/ruGPT-3.5-13B
36
+ load_in_4bit: true
37
+ load_in_8bit: false
38
+ dtype: bf16
39
+
40
+ lora:
41
+ r: 16
42
+ lora_alpha: 16
43
+ lora_dropout: 0.05
44
+ bias: none
45
+ target_modules: [ c_attn ]
46
+ task_type: CAUSAL_LM
47
+
48
+ tokenizer:
49
+ class: transformers.AutoTokenizer
50
+ name: ai-forever/ruGPT-3.5-13B
51
+ max_tokens_count: 1200
52
+
53
+ trainer:
54
+ eval_strategy: steps
55
+ save_strategy: steps
56
+ eval_steps: 100
57
+ save_steps: 100
58
+ per_device_train_batch_size: 1
59
+ per_device_eval_batch_size: 1
60
+ gradient_accumulation_steps: 128
61
+ logging_steps: 1
62
+ learning_rate: 0.0002
63
+ num_train_epochs: 2
64
+ lr_scheduler_type: cosine
65
+ warmup_steps: 16
66
+ optim: adamw_8bit
67
+ metric_for_best_model: eval_loss
68
+ load_best_model_at_end: true
69
+ save_total_limit: 2
70
+ seed: 42
71
+ remove_unused_columns: false
72
+ max_grad_norm: 1.0
73
+ weight_decay: 0.08
74
+ torch_compile: false
75
+ ```