# pip install torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate einops # pip install tqdm scipy from transformers import pipeline import torch from datasets import load_dataset from datasets import load_from_disk from peft import LoraConfig, prepare_model_for_kbit_training from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, ) from trl import SFTTrainer training_dataset = load_dataset("csv", data_files="formatted_qna_lite.csv", split="train") print(training_dataset) print("Is CUDA enabled?",torch.cuda.is_available()) base_model = "microsoft/phi-2" new_model = "phi-2-qna" tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True) tokenizer.pad_token=tokenizer.eos_token tokenizer.padding_side="right" model = AutoModelForCausalLM.from_pretrained( base_model, #quantization_config=bnb_config, # use_flash_attention_2=True, # Phi does not support yet. trust_remote_code=True, flash_attn=True, flash_rotary=True, fused_dense=True, revision="refs/pr/23", ) model.config.use_cache = False model.config.pretraining_tp = 1 training_arguments = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=2, gradient_accumulation_steps=32, evaluation_strategy="steps", eval_steps=2000, logging_steps=15, optim="adamw_hf", learning_rate=2e-4, lr_scheduler_type="cosine", save_steps=2000, warmup_ratio=0.05, weight_decay=0.01, max_steps=-1 ) peft_config = LoraConfig( r=32, lora_alpha=64, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules= ["Wqkv", "fc1", "fc2" ] # ["Wqkv", "out_proj", "fc1", "fc2" ], - 41M params # modules_to_save=["embed_tokens","lm_head"] ) trainer = SFTTrainer( model=model, train_dataset=training_dataset, peft_config= peft_config, dataset_text_field="Text", max_seq_length=690, tokenizer=tokenizer, args=training_arguments, ) trainer.train() prompt = "How old was Pascal when he lost his mother?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=250) while prompt != 'EXIT': result = pipe(f"[INST] {prompt} [/INST]") print(result[0]['generated_text']) print("\n") prompt = input("Ask the next question .....\n")