Spaces:

dad1909
/

CyberCode

Paused

App Files Files Community

dad1909 commited on Aug 16, 2024

Commit

f260b79

verified ·

1 Parent(s): c207a64

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -37

app.py CHANGED Viewed

@@ -8,9 +8,6 @@ import gradio as gr
 import json
 from huggingface_hub import HfApi
-# Ensure that all 4 GPUs are visible to PyTorch
-os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
 max_seq_length = 4096
 dtype = None
 load_in_4bit = True
@@ -20,33 +17,25 @@ current_num = os.getenv("NUM")
 print(f"stage ${current_num}")
 api = HfApi(token=hf_token)
-model_base = "unsloth/llama-3-8b-Instruct-bnb-4bit"
 print("Starting model and tokenizer loading...")
 # Load the model and tokenizer
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=model_base,
     max_seq_length=max_seq_length,
     dtype=dtype,
     load_in_4bit=load_in_4bit,
     token=hf_token
 )
-# Move the model to GPU
-model = model.to('cuda')
-# Wrap the model in DataParallel to use all GPUs
-if torch.cuda.device_count() > 1:
-    print(f"Using {torch.cuda.device_count()} GPUs!")
-    model = torch.nn.DataParallel(model)
 print("Model and tokenizer loaded successfully.")
 print("Configuring PEFT model...")
 model = FastLanguageModel.get_peft_model(
-    model.module if isinstance(model, torch.nn.DataParallel) else model,
     r=16,
     target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
     lora_alpha=16,
@@ -116,7 +105,7 @@ print("Formatting function applied.")
 print("Initializing trainer...")
 trainer = SFTTrainer(
-    model=model.module if isinstance(model, torch.nn.DataParallel) else model,
     tokenizer=tokenizer,
     train_dataset=dataset,
     dataset_text_field="text",
@@ -124,13 +113,14 @@ trainer = SFTTrainer(
     dataset_num_proc=2,
     packing=False,
     args=TrainingArguments(
-        per_device_train_batch_size=17,  # Adjust this based on GPU memory
-        gradient_accumulation_steps=17,
         learning_rate=2e-4,
         fp16=not is_bfloat16_supported(),
         bf16=is_bfloat16_supported(),
         warmup_steps=5,
         logging_steps=10,
         optim="adamw_8bit",
         weight_decay=0.01,
         lr_scheduler_type="linear",
@@ -147,30 +137,19 @@ print("Training completed.")
 num = int(current_num)
 num += 1
-uploads_models = f"cybersentinal-3.0"
 print("Saving the trained model...")
-if isinstance(model, torch.nn.DataParallel):
-    model.module.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
-else:
-    model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
 print("Model saved successfully.")
 print("Pushing the model to the hub...")
-if isinstance(model, torch.nn.DataParallel):
-    model.module.push_to_hub_merged(
-        uploads_models,
-        tokenizer,
-        save_method="merged_16bit",
-        token=hf_token
-    )
-else:
-    model.push_to_hub_merged(
-        uploads_models,
-        tokenizer,
-        save_method="merged_16bit",
-        token=hf_token
-    )
 print("Model pushed to hub successfully.")
 api.delete_space_variable(repo_id="dad1909/CyberCode", key="NUM")

 import json
 from huggingface_hub import HfApi
 max_seq_length = 4096
 dtype = None
 load_in_4bit = True
 print(f"stage ${current_num}")
 api = HfApi(token=hf_token)
+models = f"dad1909/cybersentinal-2.0-{current_num}"
+# model_base = "dad1909/cybersentinal-2.0"
 print("Starting model and tokenizer loading...")
 # Load the model and tokenizer
 model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=models,
     max_seq_length=max_seq_length,
     dtype=dtype,
     load_in_4bit=load_in_4bit,
     token=hf_token
 )
 print("Model and tokenizer loaded successfully.")
 print("Configuring PEFT model...")
 model = FastLanguageModel.get_peft_model(
+    model,
     r=16,
     target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
     lora_alpha=16,
 print("Initializing trainer...")
 trainer = SFTTrainer(
+    model=model,
     tokenizer=tokenizer,
     train_dataset=dataset,
     dataset_text_field="text",
     dataset_num_proc=2,
     packing=False,
     args=TrainingArguments(
+        per_device_train_batch_size=5,
+        gradient_accumulation_steps=5,
         learning_rate=2e-4,
         fp16=not is_bfloat16_supported(),
         bf16=is_bfloat16_supported(),
         warmup_steps=5,
         logging_steps=10,
+        max_steps=200,
         optim="adamw_8bit",
         weight_decay=0.01,
         lr_scheduler_type="linear",
 num = int(current_num)
 num += 1
+uploads_models = f"cybersentinal-2.0-{str(num)}"
 print("Saving the trained model...")
+model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
 print("Model saved successfully.")
 print("Pushing the model to the hub...")
+model.push_to_hub_merged(
+    uploads_models,
+    tokenizer,
+    save_method="merged_16bit",
+    token=hf_token
+)
 print("Model pushed to hub successfully.")
 api.delete_space_variable(repo_id="dad1909/CyberCode", key="NUM")