dad1909 commited on
Commit
f260b79
·
verified ·
1 Parent(s): c207a64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -37
app.py CHANGED
@@ -8,9 +8,6 @@ import gradio as gr
8
  import json
9
  from huggingface_hub import HfApi
10
 
11
- # Ensure that all 4 GPUs are visible to PyTorch
12
- os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
13
-
14
  max_seq_length = 4096
15
  dtype = None
16
  load_in_4bit = True
@@ -20,33 +17,25 @@ current_num = os.getenv("NUM")
20
  print(f"stage ${current_num}")
21
 
22
  api = HfApi(token=hf_token)
 
23
 
24
- model_base = "unsloth/llama-3-8b-Instruct-bnb-4bit"
25
 
26
  print("Starting model and tokenizer loading...")
27
 
28
  # Load the model and tokenizer
29
  model, tokenizer = FastLanguageModel.from_pretrained(
30
- model_name=model_base,
31
  max_seq_length=max_seq_length,
32
  dtype=dtype,
33
  load_in_4bit=load_in_4bit,
34
  token=hf_token
35
  )
36
-
37
- # Move the model to GPU
38
- model = model.to('cuda')
39
-
40
- # Wrap the model in DataParallel to use all GPUs
41
- if torch.cuda.device_count() > 1:
42
- print(f"Using {torch.cuda.device_count()} GPUs!")
43
- model = torch.nn.DataParallel(model)
44
-
45
  print("Model and tokenizer loaded successfully.")
46
 
47
  print("Configuring PEFT model...")
48
  model = FastLanguageModel.get_peft_model(
49
- model.module if isinstance(model, torch.nn.DataParallel) else model,
50
  r=16,
51
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
52
  lora_alpha=16,
@@ -116,7 +105,7 @@ print("Formatting function applied.")
116
 
117
  print("Initializing trainer...")
118
  trainer = SFTTrainer(
119
- model=model.module if isinstance(model, torch.nn.DataParallel) else model,
120
  tokenizer=tokenizer,
121
  train_dataset=dataset,
122
  dataset_text_field="text",
@@ -124,13 +113,14 @@ trainer = SFTTrainer(
124
  dataset_num_proc=2,
125
  packing=False,
126
  args=TrainingArguments(
127
- per_device_train_batch_size=17, # Adjust this based on GPU memory
128
- gradient_accumulation_steps=17,
129
  learning_rate=2e-4,
130
  fp16=not is_bfloat16_supported(),
131
  bf16=is_bfloat16_supported(),
132
  warmup_steps=5,
133
  logging_steps=10,
 
134
  optim="adamw_8bit",
135
  weight_decay=0.01,
136
  lr_scheduler_type="linear",
@@ -147,30 +137,19 @@ print("Training completed.")
147
  num = int(current_num)
148
  num += 1
149
 
150
- uploads_models = f"cybersentinal-3.0"
151
 
152
  print("Saving the trained model...")
153
- if isinstance(model, torch.nn.DataParallel):
154
- model.module.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
155
- else:
156
- model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
157
  print("Model saved successfully.")
158
 
159
  print("Pushing the model to the hub...")
160
- if isinstance(model, torch.nn.DataParallel):
161
- model.module.push_to_hub_merged(
162
- uploads_models,
163
- tokenizer,
164
- save_method="merged_16bit",
165
- token=hf_token
166
- )
167
- else:
168
- model.push_to_hub_merged(
169
- uploads_models,
170
- tokenizer,
171
- save_method="merged_16bit",
172
- token=hf_token
173
- )
174
  print("Model pushed to hub successfully.")
175
 
176
  api.delete_space_variable(repo_id="dad1909/CyberCode", key="NUM")
 
8
  import json
9
  from huggingface_hub import HfApi
10
 
 
 
 
11
  max_seq_length = 4096
12
  dtype = None
13
  load_in_4bit = True
 
17
  print(f"stage ${current_num}")
18
 
19
  api = HfApi(token=hf_token)
20
+ models = f"dad1909/cybersentinal-2.0-{current_num}"
21
 
22
+ # model_base = "dad1909/cybersentinal-2.0"
23
 
24
  print("Starting model and tokenizer loading...")
25
 
26
  # Load the model and tokenizer
27
  model, tokenizer = FastLanguageModel.from_pretrained(
28
+ model_name=models,
29
  max_seq_length=max_seq_length,
30
  dtype=dtype,
31
  load_in_4bit=load_in_4bit,
32
  token=hf_token
33
  )
 
 
 
 
 
 
 
 
 
34
  print("Model and tokenizer loaded successfully.")
35
 
36
  print("Configuring PEFT model...")
37
  model = FastLanguageModel.get_peft_model(
38
+ model,
39
  r=16,
40
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
41
  lora_alpha=16,
 
105
 
106
  print("Initializing trainer...")
107
  trainer = SFTTrainer(
108
+ model=model,
109
  tokenizer=tokenizer,
110
  train_dataset=dataset,
111
  dataset_text_field="text",
 
113
  dataset_num_proc=2,
114
  packing=False,
115
  args=TrainingArguments(
116
+ per_device_train_batch_size=5,
117
+ gradient_accumulation_steps=5,
118
  learning_rate=2e-4,
119
  fp16=not is_bfloat16_supported(),
120
  bf16=is_bfloat16_supported(),
121
  warmup_steps=5,
122
  logging_steps=10,
123
+ max_steps=200,
124
  optim="adamw_8bit",
125
  weight_decay=0.01,
126
  lr_scheduler_type="linear",
 
137
  num = int(current_num)
138
  num += 1
139
 
140
+ uploads_models = f"cybersentinal-2.0-{str(num)}"
141
 
142
  print("Saving the trained model...")
143
+ model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
 
 
 
144
  print("Model saved successfully.")
145
 
146
  print("Pushing the model to the hub...")
147
+ model.push_to_hub_merged(
148
+ uploads_models,
149
+ tokenizer,
150
+ save_method="merged_16bit",
151
+ token=hf_token
152
+ )
 
 
 
 
 
 
 
 
153
  print("Model pushed to hub successfully.")
154
 
155
  api.delete_space_variable(repo_id="dad1909/CyberCode", key="NUM")