# pip install torch peft==0.4.0 bitsandbytes transformers==4.31.0 trl==0.4.7 accelerate einops # pip install tqdm scipy import os from dataclasses import dataclass, field from typing import Optional import torch from datasets import load_dataset from datasets import load_from_disk from peft import LoraConfig, prepare_model_for_kbit_training from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, AutoTokenizer, TrainingArguments, ) from tqdm.notebook import tqdm from transformers import pipeline from trl import SFTTrainer import pandas as pd print("Is CUDA enabled?",torch.cuda.is_available()) # dataset = load_from_disk("C:\\Users\\PROMETHEUS\\Desktop\\dataset\\CommonFAQ.csv") training_dataset = load_dataset("csv", data_files="formatted_qna_lite.csv", split="train") print(training_dataset) base_model = "microsoft/phi-2" new_model = "phi-2-ft-medq" tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=False, ) model = AutoModelForCausalLM.from_pretrained( base_model, quantization_config=bnb_config, # use_flash_attention_2=True, # Phi does not support yet. trust_remote_code=True, flash_attn=True, flash_rotary=True, fused_dense=True, low_cpu_mem_usage=True, device_map={"": 0}, revision="refs/pr/23", ) model.config.use_cache = False model.config.pretraining_tp = 1 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) training_arguments = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=2, gradient_accumulation_steps=32, evaluation_strategy="steps", eval_steps=2000, logging_steps=15, optim="paged_adamw_8bit", learning_rate=2e-4, lr_scheduler_type="cosine", save_steps=2000, warmup_ratio=0.05, weight_decay=0.01, max_steps=-1 ) peft_config = LoraConfig( r=32, lora_alpha=64, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["Wqkv", "fc1", "fc2"] # ["Wqkv", "out_proj", "fc1", "fc2" ], - 41M params # modules_to_save=["embed_tokens","lm_head"] ) trainer = SFTTrainer( model=model, train_dataset=training_dataset, peft_config=peft_config, dataset_text_field="Text", max_seq_length=900, tokenizer=tokenizer, args=training_arguments, ) trainer.train() prompt = "How old was Pascal when he lost his mother?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=250) while prompt != 'EXIT': result = pipe(f"[INST] {prompt} [/INST]") print(result[0]['generated_text']) prompt = input("Ask the next question .....\n")