evilfreelancer
/

ruGPT3.5-13B-lora-function-call

Text Generation

Model card Files Files and versions Community

evilfreelancer commited on 17 days ago

Commit

7a7de9f

•

1 Parent(s): 5f854ed

Update README.md

Files changed (1) hide show

README.md +69 -7

README.md CHANGED Viewed

@@ -1,13 +1,75 @@
 ---
 base_model: ai-forever/ruGPT-3.5-13B
 library_name: peft
 ---
-# ruGPT-3.5-13B обученная на датасетах functino call
-- IlyaGusev/ru_turbo_alpaca
-- IlyaGusev/ru_turbo_alpaca_evol_instruct
-- IlyaGusev/ru_turbo_saiga
-- IlyaGusev/ru_sharegpt_cleaned
-- IlyaGusev/oasst1_ru_main_branch
-- lksy/ru_instruct_gpt4

 ---
 base_model: ai-forever/ruGPT-3.5-13B
 library_name: peft
+license: mit
+datasets:
+- korotkov/glaive-function-calling-v2-ru-parsed
+language:
+- ru
+tags:
+- impruver
+- russian
+- function call
+- lora
+pipeline_tag: text-generation
 ---
+# ruGPT-3.5-13B обученная на датасетах function call
+https://github.com/EvilFreelancer/impruver/blob/main/configs/ruGPT35_13B_fc_lora.yml
+```yaml
+# Training time
+# - 1x RTX 4090 ~ 20Gb VRAM ~ 11h 8m
+output_dir: ./models/ruGPT35_13B_lora_fc
+train_path: ./train.ruGPT35_13B_fc.jsonl
+val_path: ./val.ruGPT35_13B_fc.jsonl
+datasets:
+  - name: korotkov/glaive-function-calling-v2-ru-parsed
+    split: train
+model:
+  class: transformers.AutoModelForCausalLM
+  name: ai-forever/ruGPT-3.5-13B
+  load_in_4bit: true
+  load_in_8bit: false
+  dtype: bf16
+lora:
+  r: 16
+  lora_alpha: 16
+  lora_dropout: 0.05
+  bias: none
+  target_modules: [ c_attn ]
+  task_type: CAUSAL_LM
+tokenizer:
+  class: transformers.AutoTokenizer
+  name: ai-forever/ruGPT-3.5-13B
+  max_tokens_count: 1200
+trainer:
+  eval_strategy: steps
+  save_strategy: steps
+  eval_steps: 100
+  save_steps: 100
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  gradient_accumulation_steps: 128
+  logging_steps: 1
+  learning_rate: 0.0002
+  num_train_epochs: 2
+  lr_scheduler_type: cosine
+  warmup_steps: 16
+  optim: adamw_8bit
+  metric_for_best_model: eval_loss
+  load_best_model_at_end: true
+  save_total_limit: 2
+  seed: 42
+  remove_unused_columns: false
+  max_grad_norm: 1.0
+  weight_decay: 0.08
+  torch_compile: false
+```