Commit
•
78bd673
1
Parent(s):
79a4ae8
Training in progress, step 100
Browse files- config.json +26 -0
- config_full.yaml +46 -0
- deepspeed_zero3.yaml +22 -0
- model.safetensors +3 -0
- run_sft.py +205 -0
- runs/Feb01_17-38-02_ip-26-0-165-24/events.out.tfevents.1706809106.ip-26-0-165-24.237059.0 +3 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +43 -0
- training_args.bin +3 -0
- wandb/debug-internal.log +385 -0
- wandb/debug.log +28 -0
- wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml +218 -0
- wandb/run-20240201_173828-py26nu6m/files/config.yaml +644 -0
- wandb/run-20240201_173828-py26nu6m/files/output.log +131 -0
- wandb/run-20240201_173828-py26nu6m/files/requirements.txt +141 -0
- wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json +558 -0
- wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json +1 -0
- wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log +385 -0
- wandb/run-20240201_173828-py26nu6m/logs/debug.log +28 -0
- wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb +0 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sanchit-gandhi/Mistral-7B-v0.1-6-layer",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 14336,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 6,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 10000.0,
|
20 |
+
"sliding_window": 4096,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.36.2",
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 32000
|
26 |
+
}
|
config_full.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Model arguments
|
2 |
+
model_name_or_path: sanchit-gandhi/Mistral-7B-v0.1-6-layer
|
3 |
+
model_revision: main
|
4 |
+
torch_dtype: bfloat16
|
5 |
+
use_flash_attention_2: true
|
6 |
+
|
7 |
+
# Data training arguments
|
8 |
+
chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
|
9 |
+
dataset_mixer:
|
10 |
+
HuggingFaceH4/ultrachat_200k: 1.0
|
11 |
+
dataset_splits:
|
12 |
+
- train_sft
|
13 |
+
- test_sft
|
14 |
+
preprocessing_num_workers: 12
|
15 |
+
|
16 |
+
# SFT trainer config
|
17 |
+
bf16: true
|
18 |
+
do_eval: true
|
19 |
+
evaluation_strategy: epoch
|
20 |
+
gradient_accumulation_steps: 1
|
21 |
+
gradient_checkpointing: true
|
22 |
+
gradient_checkpointing_kwargs:
|
23 |
+
use_reentrant: False
|
24 |
+
hub_strategy: every_save
|
25 |
+
learning_rate: 2.0e-05
|
26 |
+
log_level: info
|
27 |
+
logging_steps: 5
|
28 |
+
logging_strategy: steps
|
29 |
+
lr_scheduler_type: cosine
|
30 |
+
max_seq_length: 2048
|
31 |
+
max_steps: -1
|
32 |
+
num_train_epochs: 1
|
33 |
+
output_dir: ./
|
34 |
+
overwrite_output_dir: true
|
35 |
+
per_device_eval_batch_size: 8
|
36 |
+
per_device_train_batch_size: 16
|
37 |
+
push_to_hub: true
|
38 |
+
remove_unused_columns: true
|
39 |
+
report_to:
|
40 |
+
- tensorboard
|
41 |
+
- wandb
|
42 |
+
save_strategy: "steps"
|
43 |
+
save_steps: 100
|
44 |
+
save_total_limit: 1
|
45 |
+
seed: 42
|
46 |
+
warmup_ratio: 0.1
|
deepspeed_zero3.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
compute_environment: LOCAL_MACHINE
|
2 |
+
debug: false
|
3 |
+
deepspeed_config:
|
4 |
+
deepspeed_multinode_launcher: standard
|
5 |
+
offload_optimizer_device: none
|
6 |
+
offload_param_device: none
|
7 |
+
zero3_init_flag: true
|
8 |
+
zero3_save_16bit_model: true
|
9 |
+
zero_stage: 3
|
10 |
+
distributed_type: DEEPSPEED
|
11 |
+
downcast_bf16: 'no'
|
12 |
+
machine_rank: 0
|
13 |
+
main_training_function: main
|
14 |
+
mixed_precision: bf16
|
15 |
+
num_machines: 1
|
16 |
+
num_processes: 8
|
17 |
+
rdzv_backend: static
|
18 |
+
same_network: true
|
19 |
+
tpu_env: []
|
20 |
+
tpu_use_cluster: false
|
21 |
+
tpu_use_sudo: false
|
22 |
+
use_cpu: false
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea3988d778bcca2e85f7c4596420ea91e8b5ce55168e2a692fb57b0f4bb71c04
|
3 |
+
size 3141646744
|
run_sft.py
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
"""
|
17 |
+
Supervised fine-tuning script for decoder language models.
|
18 |
+
"""
|
19 |
+
|
20 |
+
import logging
|
21 |
+
import random
|
22 |
+
import sys
|
23 |
+
|
24 |
+
import datasets
|
25 |
+
import torch
|
26 |
+
import transformers
|
27 |
+
from transformers import set_seed
|
28 |
+
|
29 |
+
from alignment import (
|
30 |
+
DataArguments,
|
31 |
+
H4ArgumentParser,
|
32 |
+
ModelArguments,
|
33 |
+
SFTConfig,
|
34 |
+
apply_chat_template,
|
35 |
+
get_checkpoint,
|
36 |
+
get_datasets,
|
37 |
+
get_kbit_device_map,
|
38 |
+
get_peft_config,
|
39 |
+
get_quantization_config,
|
40 |
+
get_tokenizer,
|
41 |
+
)
|
42 |
+
from trl import SFTTrainer
|
43 |
+
|
44 |
+
|
45 |
+
logger = logging.getLogger(__name__)
|
46 |
+
|
47 |
+
|
48 |
+
def main():
|
49 |
+
parser = H4ArgumentParser((ModelArguments, DataArguments, SFTConfig))
|
50 |
+
model_args, data_args, training_args = parser.parse()
|
51 |
+
|
52 |
+
# Set seed for reproducibility
|
53 |
+
set_seed(training_args.seed)
|
54 |
+
|
55 |
+
###############
|
56 |
+
# Setup logging
|
57 |
+
###############
|
58 |
+
logging.basicConfig(
|
59 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
60 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
61 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
62 |
+
)
|
63 |
+
log_level = training_args.get_process_log_level()
|
64 |
+
logger.setLevel(log_level)
|
65 |
+
datasets.utils.logging.set_verbosity(log_level)
|
66 |
+
transformers.utils.logging.set_verbosity(log_level)
|
67 |
+
transformers.utils.logging.enable_default_handler()
|
68 |
+
transformers.utils.logging.enable_explicit_format()
|
69 |
+
|
70 |
+
# Log on each process a small summary
|
71 |
+
logger.warning(
|
72 |
+
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
73 |
+
+ f" distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
74 |
+
)
|
75 |
+
logger.info(f"Model parameters {model_args}")
|
76 |
+
logger.info(f"Data parameters {data_args}")
|
77 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
78 |
+
|
79 |
+
# Check for last checkpoint
|
80 |
+
last_checkpoint = get_checkpoint(training_args)
|
81 |
+
if last_checkpoint is not None and training_args.resume_from_checkpoint is None:
|
82 |
+
logger.info(f"Checkpoint detected, resuming training at {last_checkpoint=}.")
|
83 |
+
|
84 |
+
###############
|
85 |
+
# Load datasets
|
86 |
+
###############
|
87 |
+
raw_datasets = get_datasets(data_args, splits=data_args.dataset_splits)
|
88 |
+
logger.info(
|
89 |
+
f"Training on the following datasets and their proportions: {[split + ' : ' + str(dset.num_rows) for split, dset in raw_datasets.items()]}"
|
90 |
+
)
|
91 |
+
column_names = list(raw_datasets["train"].features)
|
92 |
+
|
93 |
+
################
|
94 |
+
# Load tokenizer
|
95 |
+
################
|
96 |
+
tokenizer = get_tokenizer(model_args, data_args)
|
97 |
+
|
98 |
+
#####################
|
99 |
+
# Apply chat template
|
100 |
+
#####################
|
101 |
+
raw_datasets = raw_datasets.map(
|
102 |
+
apply_chat_template,
|
103 |
+
fn_kwargs={"tokenizer": tokenizer, "task": "sft"},
|
104 |
+
num_proc=data_args.preprocessing_num_workers,
|
105 |
+
remove_columns=column_names,
|
106 |
+
desc="Applying chat template",
|
107 |
+
)
|
108 |
+
train_dataset = raw_datasets["train"]
|
109 |
+
eval_dataset = raw_datasets["test"]
|
110 |
+
|
111 |
+
with training_args.main_process_first(desc="Log a few random samples from the processed training set"):
|
112 |
+
for index in random.sample(range(len(raw_datasets["train"])), 3):
|
113 |
+
logger.info(f"Sample {index} of the processed training set:\n\n{raw_datasets['train'][index]['text']}")
|
114 |
+
|
115 |
+
#######################
|
116 |
+
# Load pretrained model
|
117 |
+
#######################
|
118 |
+
logger.info("*** Load pretrained model ***")
|
119 |
+
torch_dtype = (
|
120 |
+
model_args.torch_dtype if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype)
|
121 |
+
)
|
122 |
+
quantization_config = get_quantization_config(model_args)
|
123 |
+
|
124 |
+
model_kwargs = dict(
|
125 |
+
revision=model_args.model_revision,
|
126 |
+
trust_remote_code=model_args.trust_remote_code,
|
127 |
+
use_flash_attention_2=model_args.use_flash_attention_2,
|
128 |
+
torch_dtype=torch_dtype,
|
129 |
+
use_cache=False if training_args.gradient_checkpointing else True,
|
130 |
+
device_map=get_kbit_device_map() if quantization_config is not None else None,
|
131 |
+
quantization_config=quantization_config,
|
132 |
+
)
|
133 |
+
logger.info("*** Model loaded! ***")
|
134 |
+
|
135 |
+
########################
|
136 |
+
# Initialize the Trainer
|
137 |
+
########################
|
138 |
+
trainer = SFTTrainer(
|
139 |
+
model=model_args.model_name_or_path,
|
140 |
+
model_init_kwargs=model_kwargs,
|
141 |
+
args=training_args,
|
142 |
+
train_dataset=train_dataset,
|
143 |
+
eval_dataset=eval_dataset,
|
144 |
+
dataset_text_field="text",
|
145 |
+
max_seq_length=training_args.max_seq_length,
|
146 |
+
tokenizer=tokenizer,
|
147 |
+
packing=True,
|
148 |
+
peft_config=get_peft_config(model_args),
|
149 |
+
)
|
150 |
+
|
151 |
+
###############
|
152 |
+
# Training loop
|
153 |
+
###############
|
154 |
+
logger.info("*** Train ***")
|
155 |
+
checkpoint = None
|
156 |
+
if training_args.resume_from_checkpoint is not None:
|
157 |
+
checkpoint = training_args.resume_from_checkpoint
|
158 |
+
elif last_checkpoint is not None:
|
159 |
+
checkpoint = last_checkpoint
|
160 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
161 |
+
metrics = train_result.metrics
|
162 |
+
metrics["train_samples"] = len(train_dataset)
|
163 |
+
trainer.log_metrics("train", metrics)
|
164 |
+
trainer.save_metrics("train", metrics)
|
165 |
+
trainer.save_state()
|
166 |
+
|
167 |
+
##########
|
168 |
+
# Evaluate
|
169 |
+
##########
|
170 |
+
if training_args.do_eval:
|
171 |
+
logger.info("*** Evaluate ***")
|
172 |
+
metrics = trainer.evaluate()
|
173 |
+
metrics["eval_samples"] = len(eval_dataset)
|
174 |
+
trainer.log_metrics("eval", metrics)
|
175 |
+
trainer.save_metrics("eval", metrics)
|
176 |
+
|
177 |
+
##################################
|
178 |
+
# Save model and create model card
|
179 |
+
##################################
|
180 |
+
logger.info("*** Save model ***")
|
181 |
+
trainer.save_model(training_args.output_dir)
|
182 |
+
logger.info(f"Model saved to {training_args.output_dir}")
|
183 |
+
|
184 |
+
# Save everything else on main process
|
185 |
+
kwargs = {
|
186 |
+
"finetuned_from": model_args.model_name_or_path,
|
187 |
+
"dataset": list(data_args.dataset_mixer.keys()),
|
188 |
+
"dataset_tags": list(data_args.dataset_mixer.keys()),
|
189 |
+
"tags": ["alignment-handbook"],
|
190 |
+
}
|
191 |
+
if trainer.accelerator.is_main_process:
|
192 |
+
trainer.create_model_card(**kwargs)
|
193 |
+
# Restore k,v cache for fast inference
|
194 |
+
trainer.model.config.use_cache = True
|
195 |
+
trainer.model.config.save_pretrained(training_args.output_dir)
|
196 |
+
|
197 |
+
if training_args.push_to_hub is True:
|
198 |
+
logger.info("Pushing to hub...")
|
199 |
+
trainer.push_to_hub(**kwargs)
|
200 |
+
|
201 |
+
logger.info("*** Training complete ***")
|
202 |
+
|
203 |
+
|
204 |
+
if __name__ == "__main__":
|
205 |
+
main()
|
runs/Feb01_17-38-02_ip-26-0-165-24/events.out.tfevents.1706809106.ip-26-0-165-24.237059.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16dd7fa95e3557c9f2227dd3fe7f108ee5d59052a40eeb5741db9bf340a8a65d
|
3 |
+
size 7609
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "</s>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
tokenizer_config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [],
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": true,
|
36 |
+
"model_max_length": 2048,
|
37 |
+
"pad_token": "</s>",
|
38 |
+
"sp_model_kwargs": {},
|
39 |
+
"spaces_between_special_tokens": false,
|
40 |
+
"tokenizer_class": "LlamaTokenizer",
|
41 |
+
"unk_token": "<unk>",
|
42 |
+
"use_default_system_prompt": false
|
43 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85527db3b58440cee04521e4eb775e7259e847cd1a669eaf7502bcc6b6feb0ca
|
3 |
+
size 5816
|
wandb/debug-internal.log
ADDED
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-02-01 17:38:28,445 INFO StreamThr :237521 [internal.py:wandb_internal():86] W&B internal server running at pid: 237521, started at: 2024-02-01 17:38:28.443368
|
2 |
+
2024-02-01 17:38:28,446 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status
|
3 |
+
2024-02-01 17:38:28,454 INFO WriterThread:237521 [datastore.py:open_for_write():85] open: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb
|
4 |
+
2024-02-01 17:38:28,455 DEBUG SenderThread:237521 [sender.py:send():382] send: header
|
5 |
+
2024-02-01 17:38:28,476 DEBUG SenderThread:237521 [sender.py:send():382] send: run
|
6 |
+
2024-02-01 17:38:28,713 INFO SenderThread:237521 [dir_watcher.py:__init__():211] watching files in: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files
|
7 |
+
2024-02-01 17:38:28,713 INFO SenderThread:237521 [sender.py:_start_run_threads():1136] run started: py26nu6m with start time 1706809108.451874
|
8 |
+
2024-02-01 17:38:28,720 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: check_version
|
9 |
+
2024-02-01 17:38:28,720 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: check_version
|
10 |
+
2024-02-01 17:38:28,774 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: run_start
|
11 |
+
2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():32] System info init
|
12 |
+
2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():47] System info init done
|
13 |
+
2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:start():194] Starting system monitor
|
14 |
+
2024-02-01 17:38:28,833 INFO SystemMonitor:237521 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
15 |
+
2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:probe():214] Collecting system info
|
16 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started cpu monitoring
|
17 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started disk monitoring
|
18 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started gpu monitoring
|
19 |
+
2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started memory monitoring
|
20 |
+
2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started network monitoring
|
21 |
+
2024-02-01 17:38:28,880 DEBUG HandlerThread:237521 [system_info.py:probe():196] Probing system
|
22 |
+
2024-02-01 17:38:28,883 DEBUG HandlerThread:237521 [system_info.py:_probe_git():181] Probing git
|
23 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_probe_git():189] Probing git done
|
24 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:probe():244] Probing system done
|
25 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-1048-aws-x86_64-with-glibc2.31', 'python': '3.11.5', 'heartbeatAt': '2024-02-01T17:38:28.880477', 'startedAt': '2024-02-01T17:38:28.419493', 'docker': None, 'cuda': None, 'args': ('config_full.yaml',), 'state': 'running', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'codePathLocal': 'run_sft.py', 'codePath': 'run_sft.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft', 'commit': '79a4ae874a71e67016ded927e7d23351e5c7dab8'}, 'email': None, 'root': '/fsx/sanchit/distil-zephyr-1.5b-ssft', 'host': 'ip-26-0-165-24', 'username': 'sanchit', 'executable': '/fsx/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 96, 'cpu_count_logical': 96, 'cpu_freq': {'current': 2731.1230833333334, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.237, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3593.987, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.474, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3587.972, 'min': 0.0, 'max': 0.0}, {'current': 3597.373, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2899.882, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.404, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.582, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 290.7472343444824, 'used': 57.44935989379883}}, 'gpu': 'NVIDIA H100 80GB HBM3', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}], 'memory': {'total': 1999.9855346679688}}
|
26 |
+
2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():224] Finished collecting system info
|
27 |
+
2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():227] Publishing system info
|
28 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_save_pip():52] Saving list of pip packages installed into the current environment
|
29 |
+
2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_pip():68] Saving pip packages done
|
30 |
+
2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_conda():75] Saving list of conda packages installed into the current environment
|
31 |
+
2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
|
32 |
+
2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/requirements.txt
|
33 |
+
2024-02-01 17:38:34,420 DEBUG HandlerThread:237521 [system_info.py:_save_conda():87] Saving conda packages done
|
34 |
+
2024-02-01 17:38:34,423 INFO HandlerThread:237521 [system_monitor.py:probe():229] Finished publishing system info
|
35 |
+
2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
36 |
+
2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: keepalive
|
37 |
+
2024-02-01 17:38:34,458 DEBUG SenderThread:237521 [sender.py:send():382] send: files
|
38 |
+
2024-02-01 17:38:34,458 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-metadata.json with policy now
|
39 |
+
2024-02-01 17:38:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
40 |
+
2024-02-01 17:38:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
41 |
+
2024-02-01 17:38:34,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
42 |
+
2024-02-01 17:38:34,553 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
43 |
+
2024-02-01 17:38:34,554 DEBUG SenderThread:237521 [sender.py:send():382] send: config
|
44 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
45 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
46 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
47 |
+
2024-02-01 17:38:34,555 WARNING SenderThread:237521 [sender.py:send_metric():1343] Seen metric with glob (shouldn't happen)
|
48 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
49 |
+
2024-02-01 17:38:34,721 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
|
50 |
+
2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json
|
51 |
+
2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
52 |
+
2024-02-01 17:38:34,796 INFO wandb-upload_0:237521 [upload_job.py:push():131] Uploaded file /tmp/tmpfs5f2n7fwandb/421ry27q-wandb-metadata.json
|
53 |
+
2024-02-01 17:38:36,724 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
54 |
+
2024-02-01 17:38:39,559 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
55 |
+
2024-02-01 17:38:42,110 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
56 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
57 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
58 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
59 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
60 |
+
2024-02-01 17:38:42,113 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
61 |
+
2024-02-01 17:38:42,116 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
62 |
+
2024-02-01 17:38:42,732 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
63 |
+
2024-02-01 17:38:44,735 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
64 |
+
2024-02-01 17:38:44,770 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
65 |
+
2024-02-01 17:38:45,673 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
66 |
+
2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
67 |
+
2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
68 |
+
2024-02-01 17:38:45,676 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
69 |
+
2024-02-01 17:38:45,737 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
70 |
+
2024-02-01 17:38:46,738 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
71 |
+
2024-02-01 17:38:48,741 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
72 |
+
2024-02-01 17:38:49,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
73 |
+
2024-02-01 17:38:49,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
74 |
+
2024-02-01 17:38:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
75 |
+
2024-02-01 17:38:50,190 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
76 |
+
2024-02-01 17:38:50,230 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
77 |
+
2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
78 |
+
2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
79 |
+
2024-02-01 17:38:50,234 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
80 |
+
2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
81 |
+
2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
82 |
+
2024-02-01 17:38:52,747 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
83 |
+
2024-02-01 17:38:54,715 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
84 |
+
2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
85 |
+
2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
86 |
+
2024-02-01 17:38:54,718 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
87 |
+
2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
88 |
+
2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
89 |
+
2024-02-01 17:38:55,617 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
90 |
+
2024-02-01 17:38:56,753 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
91 |
+
2024-02-01 17:38:58,756 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
92 |
+
2024-02-01 17:38:59,239 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
93 |
+
2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
94 |
+
2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
95 |
+
2024-02-01 17:38:59,242 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
96 |
+
2024-02-01 17:38:59,758 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
97 |
+
2024-02-01 17:39:00,760 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
98 |
+
2024-02-01 17:39:01,049 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
99 |
+
2024-02-01 17:39:01,762 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/config.yaml
|
100 |
+
2024-02-01 17:39:02,763 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
101 |
+
2024-02-01 17:39:03,754 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
102 |
+
2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
103 |
+
2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
104 |
+
2024-02-01 17:39:03,757 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
105 |
+
2024-02-01 17:39:03,766 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
106 |
+
2024-02-01 17:39:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
107 |
+
2024-02-01 17:39:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
108 |
+
2024-02-01 17:39:04,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
109 |
+
2024-02-01 17:39:04,767 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
110 |
+
2024-02-01 17:39:06,478 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
111 |
+
2024-02-01 17:39:06,770 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
112 |
+
2024-02-01 17:39:08,293 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
113 |
+
2024-02-01 17:39:08,294 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
114 |
+
2024-02-01 17:39:08,295 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
115 |
+
2024-02-01 17:39:08,297 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
116 |
+
2024-02-01 17:39:08,773 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
117 |
+
2024-02-01 17:39:08,774 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
118 |
+
2024-02-01 17:39:10,776 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
119 |
+
2024-02-01 17:39:11,923 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
120 |
+
2024-02-01 17:39:12,779 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
121 |
+
2024-02-01 17:39:12,828 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
122 |
+
2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
123 |
+
2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
124 |
+
2024-02-01 17:39:12,832 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
125 |
+
2024-02-01 17:39:13,781 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
126 |
+
2024-02-01 17:39:14,782 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
127 |
+
2024-02-01 17:39:16,785 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
128 |
+
2024-02-01 17:39:17,363 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
129 |
+
2024-02-01 17:39:17,365 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
130 |
+
2024-02-01 17:39:17,366 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
131 |
+
2024-02-01 17:39:17,367 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
132 |
+
2024-02-01 17:39:17,368 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
133 |
+
2024-02-01 17:39:17,788 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
134 |
+
2024-02-01 17:39:18,789 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
135 |
+
2024-02-01 17:39:19,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
136 |
+
2024-02-01 17:39:19,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
137 |
+
2024-02-01 17:39:19,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
138 |
+
2024-02-01 17:39:20,792 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
139 |
+
2024-02-01 17:39:21,909 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
140 |
+
2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
141 |
+
2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
142 |
+
2024-02-01 17:39:21,913 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
143 |
+
2024-02-01 17:39:22,795 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
144 |
+
2024-02-01 17:39:22,796 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
145 |
+
2024-02-01 17:39:22,815 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
146 |
+
2024-02-01 17:39:24,798 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
147 |
+
2024-02-01 17:39:26,448 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
148 |
+
2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
149 |
+
2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
150 |
+
2024-02-01 17:39:26,452 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
151 |
+
2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
152 |
+
2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
153 |
+
2024-02-01 17:39:28,269 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
154 |
+
2024-02-01 17:39:28,805 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
155 |
+
2024-02-01 17:39:28,836 DEBUG SystemMonitor:237521 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
156 |
+
2024-02-01 17:39:28,850 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
157 |
+
2024-02-01 17:39:30,807 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
158 |
+
2024-02-01 17:39:31,001 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
159 |
+
2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
160 |
+
2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
161 |
+
2024-02-01 17:39:31,005 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
162 |
+
2024-02-01 17:39:31,810 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
163 |
+
2024-02-01 17:39:32,811 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
164 |
+
2024-02-01 17:39:33,729 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
165 |
+
2024-02-01 17:39:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
166 |
+
2024-02-01 17:39:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
167 |
+
2024-02-01 17:39:34,468 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
168 |
+
2024-02-01 17:39:34,814 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
169 |
+
2024-02-01 17:39:35,548 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
170 |
+
2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
171 |
+
2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
172 |
+
2024-02-01 17:39:35,552 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
173 |
+
2024-02-01 17:39:35,816 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
174 |
+
2024-02-01 17:39:36,817 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
175 |
+
2024-02-01 17:39:38,820 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
176 |
+
2024-02-01 17:39:39,188 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
177 |
+
2024-02-01 17:39:40,104 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
178 |
+
2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
179 |
+
2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
180 |
+
2024-02-01 17:39:40,108 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
181 |
+
2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
182 |
+
2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
183 |
+
2024-02-01 17:39:42,826 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
184 |
+
2024-02-01 17:39:44,651 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
185 |
+
2024-02-01 17:39:44,652 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
186 |
+
2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
187 |
+
2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
188 |
+
2024-02-01 17:39:44,655 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
189 |
+
2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
190 |
+
2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
191 |
+
2024-02-01 17:39:46,833 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
192 |
+
2024-02-01 17:39:48,835 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
193 |
+
2024-02-01 17:39:49,211 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
194 |
+
2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
195 |
+
2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
196 |
+
2024-02-01 17:39:49,214 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
197 |
+
2024-02-01 17:39:49,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
198 |
+
2024-02-01 17:39:49,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
199 |
+
2024-02-01 17:39:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
200 |
+
2024-02-01 17:39:49,838 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
201 |
+
2024-02-01 17:39:50,121 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
202 |
+
2024-02-01 17:39:50,839 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
203 |
+
2024-02-01 17:39:52,842 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
204 |
+
2024-02-01 17:39:53,762 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
205 |
+
2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
206 |
+
2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
207 |
+
2024-02-01 17:39:53,765 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
208 |
+
2024-02-01 17:39:53,844 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
209 |
+
2024-02-01 17:39:54,845 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
210 |
+
2024-02-01 17:39:55,580 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
211 |
+
2024-02-01 17:39:56,848 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
212 |
+
2024-02-01 17:39:58,314 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
213 |
+
2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
214 |
+
2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
215 |
+
2024-02-01 17:39:58,318 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
216 |
+
2024-02-01 17:39:58,842 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
217 |
+
2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
218 |
+
2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
219 |
+
2024-02-01 17:40:00,854 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
220 |
+
2024-02-01 17:40:01,035 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
221 |
+
2024-02-01 17:40:02,853 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
222 |
+
2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
223 |
+
2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
224 |
+
2024-02-01 17:40:02,857 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
225 |
+
2024-02-01 17:40:02,858 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
226 |
+
2024-02-01 17:40:02,859 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
227 |
+
2024-02-01 17:40:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
228 |
+
2024-02-01 17:40:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
229 |
+
2024-02-01 17:40:04,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
230 |
+
2024-02-01 17:40:04,861 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
231 |
+
2024-02-01 17:40:06,498 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
232 |
+
2024-02-01 17:40:06,864 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
233 |
+
2024-02-01 17:40:07,408 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
234 |
+
2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
235 |
+
2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
236 |
+
2024-02-01 17:40:07,411 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
237 |
+
2024-02-01 17:40:07,866 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
238 |
+
2024-02-01 17:40:08,867 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
239 |
+
2024-02-01 17:40:10,870 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
240 |
+
2024-02-01 17:40:11,953 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
241 |
+
2024-02-01 17:40:11,954 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
242 |
+
2024-02-01 17:40:11,955 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
243 |
+
2024-02-01 17:40:11,956 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
244 |
+
2024-02-01 17:40:11,958 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
245 |
+
2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
246 |
+
2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
247 |
+
2024-02-01 17:40:14,876 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
248 |
+
2024-02-01 17:40:16,879 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
249 |
+
2024-02-01 17:40:17,215 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
250 |
+
2024-02-01 17:40:18,882 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
251 |
+
2024-02-01 17:40:19,958 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
252 |
+
2024-02-01 17:40:19,959 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
253 |
+
2024-02-01 17:40:19,959 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
254 |
+
2024-02-01 17:40:22,278 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
255 |
+
2024-02-01 17:40:22,888 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
256 |
+
2024-02-01 17:40:24,892 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
257 |
+
2024-02-01 17:40:26,895 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
258 |
+
2024-02-01 17:40:27,568 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
259 |
+
2024-02-01 17:40:27,569 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
260 |
+
2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
261 |
+
2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
262 |
+
2024-02-01 17:40:27,573 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
263 |
+
2024-02-01 17:40:27,897 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
264 |
+
2024-02-01 17:40:28,844 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
265 |
+
2024-02-01 17:40:28,898 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
266 |
+
2024-02-01 17:40:30,901 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
267 |
+
2024-02-01 17:40:32,115 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
268 |
+
2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
269 |
+
2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
270 |
+
2024-02-01 17:40:32,119 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
271 |
+
2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
272 |
+
2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
273 |
+
2024-02-01 17:40:33,026 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
274 |
+
2024-02-01 17:40:34,907 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
275 |
+
2024-02-01 17:40:34,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
276 |
+
2024-02-01 17:40:34,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
277 |
+
2024-02-01 17:40:34,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
278 |
+
2024-02-01 17:40:36,665 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
279 |
+
2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
280 |
+
2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
281 |
+
2024-02-01 17:40:36,669 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
282 |
+
2024-02-01 17:40:36,910 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
283 |
+
2024-02-01 17:40:36,911 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
284 |
+
2024-02-01 17:40:38,487 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
285 |
+
2024-02-01 17:40:38,913 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
286 |
+
2024-02-01 17:40:40,915 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
287 |
+
2024-02-01 17:40:41,219 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
288 |
+
2024-02-01 17:40:41,220 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
289 |
+
2024-02-01 17:40:41,221 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
290 |
+
2024-02-01 17:40:41,223 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
291 |
+
2024-02-01 17:40:41,917 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
292 |
+
2024-02-01 17:40:42,919 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
293 |
+
2024-02-01 17:40:43,949 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
294 |
+
2024-02-01 17:40:44,922 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
295 |
+
2024-02-01 17:40:45,773 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
296 |
+
2024-02-01 17:40:45,775 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
297 |
+
2024-02-01 17:40:45,776 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
298 |
+
2024-02-01 17:40:45,778 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
299 |
+
2024-02-01 17:40:45,924 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
300 |
+
2024-02-01 17:40:46,925 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
301 |
+
2024-02-01 17:40:48,927 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
302 |
+
2024-02-01 17:40:49,410 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
303 |
+
2024-02-01 17:40:49,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
304 |
+
2024-02-01 17:40:49,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
305 |
+
2024-02-01 17:40:49,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
306 |
+
2024-02-01 17:40:50,362 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
307 |
+
2024-02-01 17:40:50,930 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
308 |
+
2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
309 |
+
2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
310 |
+
2024-02-01 17:40:51,405 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
311 |
+
2024-02-01 17:40:51,932 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
312 |
+
2024-02-01 17:40:52,934 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
313 |
+
2024-02-01 17:40:54,873 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
314 |
+
2024-02-01 17:40:54,874 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
315 |
+
2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
316 |
+
2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
317 |
+
2024-02-01 17:40:54,878 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
318 |
+
2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
319 |
+
2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
320 |
+
2024-02-01 17:40:56,940 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
321 |
+
2024-02-01 17:40:58,846 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
322 |
+
2024-02-01 17:40:58,942 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
323 |
+
2024-02-01 17:40:59,420 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
324 |
+
2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
325 |
+
2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
326 |
+
2024-02-01 17:40:59,424 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
327 |
+
2024-02-01 17:40:59,944 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
328 |
+
2024-02-01 17:41:00,330 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
329 |
+
2024-02-01 17:41:00,946 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
330 |
+
2024-02-01 17:41:02,948 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
331 |
+
2024-02-01 17:41:03,975 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
332 |
+
2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
333 |
+
2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
334 |
+
2024-02-01 17:41:03,978 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
335 |
+
2024-02-01 17:41:04,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
336 |
+
2024-02-01 17:41:04,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
337 |
+
2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
338 |
+
2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
339 |
+
2024-02-01 17:41:04,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
340 |
+
2024-02-01 17:41:05,797 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
341 |
+
2024-02-01 17:41:06,955 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
342 |
+
2024-02-01 17:41:08,527 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
343 |
+
2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
344 |
+
2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
345 |
+
2024-02-01 17:41:08,531 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
346 |
+
2024-02-01 17:41:08,958 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
347 |
+
2024-02-01 17:41:08,959 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
348 |
+
2024-02-01 17:41:10,961 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
349 |
+
2024-02-01 17:41:11,264 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
350 |
+
2024-02-01 17:41:12,964 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
351 |
+
2024-02-01 17:41:13,085 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
352 |
+
2024-02-01 17:41:13,086 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
353 |
+
2024-02-01 17:41:13,087 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
354 |
+
2024-02-01 17:41:13,089 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
355 |
+
2024-02-01 17:41:13,966 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
356 |
+
2024-02-01 17:41:14,967 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
357 |
+
2024-02-01 17:41:16,736 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
358 |
+
2024-02-01 17:41:16,969 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
359 |
+
2024-02-01 17:41:17,649 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
360 |
+
2024-02-01 17:41:17,650 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
361 |
+
2024-02-01 17:41:17,651 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
362 |
+
2024-02-01 17:41:17,653 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
363 |
+
2024-02-01 17:41:17,972 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
364 |
+
2024-02-01 17:41:18,973 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
365 |
+
2024-02-01 17:41:19,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
366 |
+
2024-02-01 17:41:19,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
367 |
+
2024-02-01 17:41:19,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
368 |
+
2024-02-01 17:41:20,976 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
369 |
+
2024-02-01 17:41:22,234 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
370 |
+
2024-02-01 17:41:22,235 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
371 |
+
2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
372 |
+
2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
373 |
+
2024-02-01 17:41:22,238 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
374 |
+
2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
375 |
+
2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
376 |
+
2024-02-01 17:41:24,981 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
377 |
+
2024-02-01 17:41:26,803 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
378 |
+
2024-02-01 17:41:26,804 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
379 |
+
2024-02-01 17:41:26,805 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
380 |
+
2024-02-01 17:41:26,806 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
381 |
+
2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
382 |
+
2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
383 |
+
2024-02-01 17:41:27,718 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
384 |
+
2024-02-01 17:41:28,848 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
385 |
+
2024-02-01 17:41:28,987 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
wandb/debug.log
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Current SDK version is 0.16.1
|
2 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Configure stats pid to 237059
|
3 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
|
4 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/settings
|
5 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py'}
|
8 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():524] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug.log
|
9 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():525] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log
|
10 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():564] calling init triggers
|
11 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
|
12 |
+
config: {}
|
13 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():614] starting backend
|
14 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():618] setting up manager
|
15 |
+
2024-02-01 17:38:28,441 INFO MainThread:237059 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
16 |
+
2024-02-01 17:38:28,451 INFO MainThread:237059 [wandb_init.py:init():624] backend started and connected
|
17 |
+
2024-02-01 17:38:28,453 INFO MainThread:237059 [wandb_init.py:init():716] updated telemetry
|
18 |
+
2024-02-01 17:38:28,475 INFO MainThread:237059 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
|
19 |
+
2024-02-01 17:38:28,720 INFO MainThread:237059 [wandb_run.py:_on_init():2254] communicating current version
|
20 |
+
2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.2 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
|
21 |
+
|
22 |
+
2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_init.py:init():800] starting run threads in backend
|
23 |
+
2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_console_start():2233] atexit reg
|
24 |
+
2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_redirect():2088] redirect: wrap_raw
|
25 |
+
2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2153] Wrapping output streams.
|
26 |
+
2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2178] Redirects installed.
|
27 |
+
2024-02-01 17:38:34,467 INFO MainThread:237059 [wandb_init.py:init():841] run started, returning control to user process
|
28 |
+
2024-02-01 17:38:34,468 INFO MainThread:237059 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.36.2', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb01_17-38-02_ip-26-0-165-24', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 5, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'max_seq_length': 2048}
|
wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: venv
|
2 |
+
channels:
|
3 |
+
- pytorch
|
4 |
+
- nvidia
|
5 |
+
- defaults
|
6 |
+
dependencies:
|
7 |
+
- _libgcc_mutex=0.1=main
|
8 |
+
- _openmp_mutex=5.1=1_gnu
|
9 |
+
- blas=1.0=mkl
|
10 |
+
- brotli-python=1.0.9=py311h6a678d5_7
|
11 |
+
- bzip2=1.0.8=h7b6447c_0
|
12 |
+
- ca-certificates=2023.12.12=h06a4308_0
|
13 |
+
- certifi=2023.11.17=py311h06a4308_0
|
14 |
+
- cffi=1.16.0=py311h5eee18b_0
|
15 |
+
- cryptography=41.0.7=py311hdda0065_0
|
16 |
+
- cuda-cudart=12.1.105=0
|
17 |
+
- cuda-cupti=12.1.105=0
|
18 |
+
- cuda-libraries=12.1.0=0
|
19 |
+
- cuda-nvrtc=12.1.105=0
|
20 |
+
- cuda-nvtx=12.1.105=0
|
21 |
+
- cuda-opencl=12.3.101=0
|
22 |
+
- cuda-runtime=12.1.0=0
|
23 |
+
- ffmpeg=4.3=hf484d3e_0
|
24 |
+
- filelock=3.13.1=py311h06a4308_0
|
25 |
+
- freetype=2.12.1=h4a9f257_0
|
26 |
+
- giflib=5.2.1=h5eee18b_3
|
27 |
+
- gmp=6.2.1=h295c915_3
|
28 |
+
- gmpy2=2.1.2=py311hc9b5ff0_0
|
29 |
+
- gnutls=3.6.15=he1e5248_0
|
30 |
+
- intel-openmp=2023.1.0=hdb19cb5_46306
|
31 |
+
- jinja2=3.1.2=py311h06a4308_0
|
32 |
+
- jpeg=9e=h5eee18b_1
|
33 |
+
- lame=3.100=h7b6447c_0
|
34 |
+
- lcms2=2.12=h3be6417_0
|
35 |
+
- ld_impl_linux-64=2.38=h1181459_1
|
36 |
+
- lerc=3.0=h295c915_0
|
37 |
+
- libcublas=12.1.0.26=0
|
38 |
+
- libcufft=11.0.2.4=0
|
39 |
+
- libcufile=1.8.1.2=0
|
40 |
+
- libcurand=10.3.4.101=0
|
41 |
+
- libcusolver=11.4.4.55=0
|
42 |
+
- libcusparse=12.0.2.55=0
|
43 |
+
- libdeflate=1.17=h5eee18b_1
|
44 |
+
- libffi=3.4.4=h6a678d5_0
|
45 |
+
- libgcc-ng=11.2.0=h1234567_1
|
46 |
+
- libgomp=11.2.0=h1234567_1
|
47 |
+
- libiconv=1.16=h7f8727e_2
|
48 |
+
- libidn2=2.3.4=h5eee18b_0
|
49 |
+
- libjpeg-turbo=2.0.0=h9bf148f_0
|
50 |
+
- libnpp=12.0.2.50=0
|
51 |
+
- libnvjitlink=12.1.105=0
|
52 |
+
- libnvjpeg=12.1.1.14=0
|
53 |
+
- libpng=1.6.39=h5eee18b_0
|
54 |
+
- libstdcxx-ng=11.2.0=h1234567_1
|
55 |
+
- libtasn1=4.19.0=h5eee18b_0
|
56 |
+
- libtiff=4.5.1=h6a678d5_0
|
57 |
+
- libunistring=0.9.10=h27cfd23_0
|
58 |
+
- libuuid=1.41.5=h5eee18b_0
|
59 |
+
- libwebp=1.3.2=h11a3e52_0
|
60 |
+
- libwebp-base=1.3.2=h5eee18b_0
|
61 |
+
- llvm-openmp=14.0.6=h9e868ea_0
|
62 |
+
- lz4-c=1.9.4=h6a678d5_0
|
63 |
+
- markupsafe=2.1.1=py311h5eee18b_0
|
64 |
+
- mkl=2023.1.0=h213fc3f_46344
|
65 |
+
- mkl-service=2.4.0=py311h5eee18b_1
|
66 |
+
- mkl_fft=1.3.8=py311h5eee18b_0
|
67 |
+
- mkl_random=1.2.4=py311hdb19cb5_0
|
68 |
+
- mpc=1.1.0=h10f8cd9_1
|
69 |
+
- mpfr=4.0.2=hb69a4c5_1
|
70 |
+
- mpmath=1.3.0=py311h06a4308_0
|
71 |
+
- ncurses=6.4=h6a678d5_0
|
72 |
+
- nettle=3.7.3=hbbd107a_1
|
73 |
+
- networkx=3.1=py311h06a4308_0
|
74 |
+
- numpy=1.26.2=py311h08b1b3b_0
|
75 |
+
- numpy-base=1.26.2=py311hf175353_0
|
76 |
+
- openh264=2.1.1=h4ff587b_0
|
77 |
+
- openjpeg=2.4.0=h3ad879b_0
|
78 |
+
- openssl=3.0.12=h7f8727e_0
|
79 |
+
- pycparser=2.21=pyhd3eb1b0_0
|
80 |
+
- pyopenssl=23.2.0=py311h06a4308_0
|
81 |
+
- pysocks=1.7.1=py311h06a4308_0
|
82 |
+
- python=3.11.5=h955ad1f_0
|
83 |
+
- pytorch=2.1.2=py3.11_cuda12.1_cudnn8.9.2_0
|
84 |
+
- pytorch-cuda=12.1=ha16c6d3_5
|
85 |
+
- pytorch-mutex=1.0=cuda
|
86 |
+
- pyyaml=6.0.1=py311h5eee18b_0
|
87 |
+
- readline=8.2=h5eee18b_0
|
88 |
+
- requests=2.31.0=py311h06a4308_0
|
89 |
+
- setuptools=68.2.2=py311h06a4308_0
|
90 |
+
- sqlite=3.41.2=h5eee18b_0
|
91 |
+
- sympy=1.12=py311h06a4308_0
|
92 |
+
- tbb=2021.8.0=hdb19cb5_0
|
93 |
+
- tk=8.6.12=h1ccaba5_0
|
94 |
+
- torchaudio=2.1.2=py311_cu121
|
95 |
+
- torchtriton=2.1.0=py311
|
96 |
+
- torchvision=0.16.2=py311_cu121
|
97 |
+
- typing_extensions=4.7.1=py311h06a4308_0
|
98 |
+
- wheel=0.41.2=py311h06a4308_0
|
99 |
+
- xz=5.4.5=h5eee18b_0
|
100 |
+
- yaml=0.2.5=h7b6447c_0
|
101 |
+
- zlib=1.2.13=h5eee18b_0
|
102 |
+
- zstd=1.5.5=hc292b87_0
|
103 |
+
- pip:
|
104 |
+
- absl-py==2.0.0
|
105 |
+
- accelerate==0.23.0
|
106 |
+
- aiohttp==3.9.1
|
107 |
+
- aiosignal==1.3.1
|
108 |
+
- annotated-types==0.6.0
|
109 |
+
- appdirs==1.4.4
|
110 |
+
- astunparse==1.6.3
|
111 |
+
- attrs==23.1.0
|
112 |
+
- audioread==3.0.1
|
113 |
+
- bitsandbytes==0.41.2.post2
|
114 |
+
- cachetools==5.3.2
|
115 |
+
- chardet==5.2.0
|
116 |
+
- charset-normalizer==3.3.2
|
117 |
+
- click==8.1.7
|
118 |
+
- datasets==2.14.6
|
119 |
+
- decorator==5.1.1
|
120 |
+
- deepspeed==0.12.2
|
121 |
+
- dill==0.3.7
|
122 |
+
- docker-pycreds==0.4.0
|
123 |
+
- docstring-parser==0.15
|
124 |
+
- einops==0.7.0
|
125 |
+
- evaluate==0.4.0
|
126 |
+
- flash-attn==2.5.2
|
127 |
+
- flatbuffers==23.5.26
|
128 |
+
- frozenlist==1.4.1
|
129 |
+
- fsspec==2023.10.0
|
130 |
+
- gast==0.5.4
|
131 |
+
- gitdb==4.0.11
|
132 |
+
- gitpython==3.1.40
|
133 |
+
- google-auth==2.26.1
|
134 |
+
- google-auth-oauthlib==1.2.0
|
135 |
+
- google-pasta==0.2.0
|
136 |
+
- grpcio==1.60.0
|
137 |
+
- h5py==3.10.0
|
138 |
+
- hf-transfer==0.1.5
|
139 |
+
- hjson==3.1.0
|
140 |
+
- huggingface-hub==0.20.1
|
141 |
+
- idna==3.6
|
142 |
+
- jiwer==3.0.3
|
143 |
+
- joblib==1.3.2
|
144 |
+
- keras==2.15.0
|
145 |
+
- lazy-loader==0.3
|
146 |
+
- libclang==16.0.6
|
147 |
+
- librosa==0.10.1
|
148 |
+
- llvmlite==0.41.1
|
149 |
+
- markdown==3.5.1
|
150 |
+
- markdown-it-py==3.0.0
|
151 |
+
- mdurl==0.1.2
|
152 |
+
- ml-dtypes==0.2.0
|
153 |
+
- msgpack==1.0.7
|
154 |
+
- multidict==6.0.4
|
155 |
+
- multiprocess==0.70.15
|
156 |
+
- ninja==1.11.1.1
|
157 |
+
- nltk==3.8.1
|
158 |
+
- numba==0.58.1
|
159 |
+
- oauthlib==3.2.2
|
160 |
+
- opt-einsum==3.3.0
|
161 |
+
- packaging==23.2
|
162 |
+
- pandas==2.1.4
|
163 |
+
- peft==0.7.1
|
164 |
+
- pillow==10.2.0
|
165 |
+
- pip==23.3.2
|
166 |
+
- platformdirs==4.1.0
|
167 |
+
- pooch==1.8.0
|
168 |
+
- protobuf==3.20.2
|
169 |
+
- psutil==5.9.7
|
170 |
+
- py-cpuinfo==9.0.0
|
171 |
+
- pyarrow==14.0.2
|
172 |
+
- pyarrow-hotfix==0.6
|
173 |
+
- pyasn1==0.5.1
|
174 |
+
- pyasn1-modules==0.3.0
|
175 |
+
- pydantic==2.6.0
|
176 |
+
- pydantic-core==2.16.1
|
177 |
+
- pygments==2.17.2
|
178 |
+
- pynvml==11.5.0
|
179 |
+
- python-dateutil==2.8.2
|
180 |
+
- pytz==2023.3.post1
|
181 |
+
- rapidfuzz==3.6.1
|
182 |
+
- regex==2023.12.25
|
183 |
+
- requests-oauthlib==1.3.1
|
184 |
+
- responses==0.18.0
|
185 |
+
- rich==13.7.0
|
186 |
+
- rsa==4.9
|
187 |
+
- safetensors==0.4.1
|
188 |
+
- scikit-learn==1.3.2
|
189 |
+
- scipy==1.11.4
|
190 |
+
- sentencepiece==0.1.99
|
191 |
+
- sentry-sdk==1.39.1
|
192 |
+
- setproctitle==1.3.3
|
193 |
+
- shtab==1.6.5
|
194 |
+
- six==1.16.0
|
195 |
+
- smmap==5.0.1
|
196 |
+
- soundfile==0.12.1
|
197 |
+
- soxr==0.3.7
|
198 |
+
- tensorboard==2.15.1
|
199 |
+
- tensorboard-data-server==0.7.2
|
200 |
+
- tensorflow-cpu==2.15.0.post1
|
201 |
+
- tensorflow-estimator==2.15.0
|
202 |
+
- tensorflow-io-gcs-filesystem==0.35.0
|
203 |
+
- termcolor==2.4.0
|
204 |
+
- threadpoolctl==3.2.0
|
205 |
+
- tokenizers==0.15.0
|
206 |
+
- tqdm==4.66.1
|
207 |
+
- transformers==4.36.2
|
208 |
+
- trl==0.7.7
|
209 |
+
- typing-extensions==4.9.0
|
210 |
+
- tyro==0.7.0
|
211 |
+
- tzdata==2023.3
|
212 |
+
- urllib3==2.1.0
|
213 |
+
- wandb==0.16.1
|
214 |
+
- werkzeug==3.0.1
|
215 |
+
- wrapt==1.14.1
|
216 |
+
- xxhash==3.4.1
|
217 |
+
- yarl==1.9.4
|
218 |
+
prefix: /fsx/sanchit/miniconda3/envs/venv
|
wandb/run-20240201_173828-py26nu6m/files/config.yaml
ADDED
@@ -0,0 +1,644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
python_version: 3.11.5
|
7 |
+
cli_version: 0.16.1
|
8 |
+
framework: huggingface
|
9 |
+
huggingface_version: 4.36.2
|
10 |
+
is_jupyter_run: false
|
11 |
+
is_kaggle_kernel: false
|
12 |
+
start_time: 1706809108.451874
|
13 |
+
t:
|
14 |
+
1:
|
15 |
+
- 1
|
16 |
+
- 2
|
17 |
+
- 3
|
18 |
+
- 5
|
19 |
+
- 11
|
20 |
+
- 49
|
21 |
+
- 51
|
22 |
+
- 53
|
23 |
+
- 55
|
24 |
+
- 71
|
25 |
+
- 84
|
26 |
+
- 98
|
27 |
+
2:
|
28 |
+
- 1
|
29 |
+
- 2
|
30 |
+
- 3
|
31 |
+
- 5
|
32 |
+
- 11
|
33 |
+
- 49
|
34 |
+
- 51
|
35 |
+
- 53
|
36 |
+
- 55
|
37 |
+
- 71
|
38 |
+
- 84
|
39 |
+
- 98
|
40 |
+
3:
|
41 |
+
- 7
|
42 |
+
- 23
|
43 |
+
4: 3.11.5
|
44 |
+
5: 0.16.1
|
45 |
+
6: 4.36.2
|
46 |
+
8:
|
47 |
+
- 5
|
48 |
+
9:
|
49 |
+
1: transformers_trainer
|
50 |
+
13: linux-x86_64
|
51 |
+
m:
|
52 |
+
- 1: train/global_step
|
53 |
+
6:
|
54 |
+
- 3
|
55 |
+
- 1: train/loss
|
56 |
+
5: 1
|
57 |
+
6:
|
58 |
+
- 1
|
59 |
+
- 1: train/learning_rate
|
60 |
+
5: 1
|
61 |
+
6:
|
62 |
+
- 1
|
63 |
+
- 1: train/epoch
|
64 |
+
5: 1
|
65 |
+
6:
|
66 |
+
- 1
|
67 |
+
vocab_size:
|
68 |
+
desc: null
|
69 |
+
value: 32000
|
70 |
+
max_position_embeddings:
|
71 |
+
desc: null
|
72 |
+
value: 32768
|
73 |
+
hidden_size:
|
74 |
+
desc: null
|
75 |
+
value: 4096
|
76 |
+
intermediate_size:
|
77 |
+
desc: null
|
78 |
+
value: 14336
|
79 |
+
num_hidden_layers:
|
80 |
+
desc: null
|
81 |
+
value: 6
|
82 |
+
num_attention_heads:
|
83 |
+
desc: null
|
84 |
+
value: 32
|
85 |
+
sliding_window:
|
86 |
+
desc: null
|
87 |
+
value: 4096
|
88 |
+
num_key_value_heads:
|
89 |
+
desc: null
|
90 |
+
value: 8
|
91 |
+
hidden_act:
|
92 |
+
desc: null
|
93 |
+
value: silu
|
94 |
+
initializer_range:
|
95 |
+
desc: null
|
96 |
+
value: 0.02
|
97 |
+
rms_norm_eps:
|
98 |
+
desc: null
|
99 |
+
value: 1.0e-05
|
100 |
+
use_cache:
|
101 |
+
desc: null
|
102 |
+
value: false
|
103 |
+
rope_theta:
|
104 |
+
desc: null
|
105 |
+
value: 10000.0
|
106 |
+
attention_dropout:
|
107 |
+
desc: null
|
108 |
+
value: 0.0
|
109 |
+
return_dict:
|
110 |
+
desc: null
|
111 |
+
value: true
|
112 |
+
output_hidden_states:
|
113 |
+
desc: null
|
114 |
+
value: false
|
115 |
+
output_attentions:
|
116 |
+
desc: null
|
117 |
+
value: false
|
118 |
+
torchscript:
|
119 |
+
desc: null
|
120 |
+
value: false
|
121 |
+
torch_dtype:
|
122 |
+
desc: null
|
123 |
+
value: bfloat16
|
124 |
+
use_bfloat16:
|
125 |
+
desc: null
|
126 |
+
value: false
|
127 |
+
tf_legacy_loss:
|
128 |
+
desc: null
|
129 |
+
value: false
|
130 |
+
pruned_heads:
|
131 |
+
desc: null
|
132 |
+
value: {}
|
133 |
+
tie_word_embeddings:
|
134 |
+
desc: null
|
135 |
+
value: false
|
136 |
+
is_encoder_decoder:
|
137 |
+
desc: null
|
138 |
+
value: false
|
139 |
+
is_decoder:
|
140 |
+
desc: null
|
141 |
+
value: false
|
142 |
+
cross_attention_hidden_size:
|
143 |
+
desc: null
|
144 |
+
value: null
|
145 |
+
add_cross_attention:
|
146 |
+
desc: null
|
147 |
+
value: false
|
148 |
+
tie_encoder_decoder:
|
149 |
+
desc: null
|
150 |
+
value: false
|
151 |
+
max_length:
|
152 |
+
desc: null
|
153 |
+
value: 20
|
154 |
+
min_length:
|
155 |
+
desc: null
|
156 |
+
value: 0
|
157 |
+
do_sample:
|
158 |
+
desc: null
|
159 |
+
value: false
|
160 |
+
early_stopping:
|
161 |
+
desc: null
|
162 |
+
value: false
|
163 |
+
num_beams:
|
164 |
+
desc: null
|
165 |
+
value: 1
|
166 |
+
num_beam_groups:
|
167 |
+
desc: null
|
168 |
+
value: 1
|
169 |
+
diversity_penalty:
|
170 |
+
desc: null
|
171 |
+
value: 0.0
|
172 |
+
temperature:
|
173 |
+
desc: null
|
174 |
+
value: 1.0
|
175 |
+
top_k:
|
176 |
+
desc: null
|
177 |
+
value: 50
|
178 |
+
top_p:
|
179 |
+
desc: null
|
180 |
+
value: 1.0
|
181 |
+
typical_p:
|
182 |
+
desc: null
|
183 |
+
value: 1.0
|
184 |
+
repetition_penalty:
|
185 |
+
desc: null
|
186 |
+
value: 1.0
|
187 |
+
length_penalty:
|
188 |
+
desc: null
|
189 |
+
value: 1.0
|
190 |
+
no_repeat_ngram_size:
|
191 |
+
desc: null
|
192 |
+
value: 0
|
193 |
+
encoder_no_repeat_ngram_size:
|
194 |
+
desc: null
|
195 |
+
value: 0
|
196 |
+
bad_words_ids:
|
197 |
+
desc: null
|
198 |
+
value: null
|
199 |
+
num_return_sequences:
|
200 |
+
desc: null
|
201 |
+
value: 1
|
202 |
+
chunk_size_feed_forward:
|
203 |
+
desc: null
|
204 |
+
value: 0
|
205 |
+
output_scores:
|
206 |
+
desc: null
|
207 |
+
value: false
|
208 |
+
return_dict_in_generate:
|
209 |
+
desc: null
|
210 |
+
value: false
|
211 |
+
forced_bos_token_id:
|
212 |
+
desc: null
|
213 |
+
value: null
|
214 |
+
forced_eos_token_id:
|
215 |
+
desc: null
|
216 |
+
value: null
|
217 |
+
remove_invalid_values:
|
218 |
+
desc: null
|
219 |
+
value: false
|
220 |
+
exponential_decay_length_penalty:
|
221 |
+
desc: null
|
222 |
+
value: null
|
223 |
+
suppress_tokens:
|
224 |
+
desc: null
|
225 |
+
value: null
|
226 |
+
begin_suppress_tokens:
|
227 |
+
desc: null
|
228 |
+
value: null
|
229 |
+
architectures:
|
230 |
+
desc: null
|
231 |
+
value:
|
232 |
+
- MistralForCausalLM
|
233 |
+
finetuning_task:
|
234 |
+
desc: null
|
235 |
+
value: null
|
236 |
+
id2label:
|
237 |
+
desc: null
|
238 |
+
value:
|
239 |
+
'0': LABEL_0
|
240 |
+
'1': LABEL_1
|
241 |
+
label2id:
|
242 |
+
desc: null
|
243 |
+
value:
|
244 |
+
LABEL_0: 0
|
245 |
+
LABEL_1: 1
|
246 |
+
tokenizer_class:
|
247 |
+
desc: null
|
248 |
+
value: null
|
249 |
+
prefix:
|
250 |
+
desc: null
|
251 |
+
value: null
|
252 |
+
bos_token_id:
|
253 |
+
desc: null
|
254 |
+
value: 1
|
255 |
+
pad_token_id:
|
256 |
+
desc: null
|
257 |
+
value: null
|
258 |
+
eos_token_id:
|
259 |
+
desc: null
|
260 |
+
value: 2
|
261 |
+
sep_token_id:
|
262 |
+
desc: null
|
263 |
+
value: null
|
264 |
+
decoder_start_token_id:
|
265 |
+
desc: null
|
266 |
+
value: null
|
267 |
+
task_specific_params:
|
268 |
+
desc: null
|
269 |
+
value: null
|
270 |
+
problem_type:
|
271 |
+
desc: null
|
272 |
+
value: null
|
273 |
+
_name_or_path:
|
274 |
+
desc: null
|
275 |
+
value: sanchit-gandhi/Mistral-7B-v0.1-6-layer
|
276 |
+
transformers_version:
|
277 |
+
desc: null
|
278 |
+
value: 4.36.2
|
279 |
+
model_type:
|
280 |
+
desc: null
|
281 |
+
value: mistral
|
282 |
+
output_dir:
|
283 |
+
desc: null
|
284 |
+
value: ./
|
285 |
+
overwrite_output_dir:
|
286 |
+
desc: null
|
287 |
+
value: true
|
288 |
+
do_train:
|
289 |
+
desc: null
|
290 |
+
value: false
|
291 |
+
do_eval:
|
292 |
+
desc: null
|
293 |
+
value: true
|
294 |
+
do_predict:
|
295 |
+
desc: null
|
296 |
+
value: false
|
297 |
+
evaluation_strategy:
|
298 |
+
desc: null
|
299 |
+
value: epoch
|
300 |
+
prediction_loss_only:
|
301 |
+
desc: null
|
302 |
+
value: false
|
303 |
+
per_device_train_batch_size:
|
304 |
+
desc: null
|
305 |
+
value: 16
|
306 |
+
per_device_eval_batch_size:
|
307 |
+
desc: null
|
308 |
+
value: 8
|
309 |
+
per_gpu_train_batch_size:
|
310 |
+
desc: null
|
311 |
+
value: null
|
312 |
+
per_gpu_eval_batch_size:
|
313 |
+
desc: null
|
314 |
+
value: null
|
315 |
+
gradient_accumulation_steps:
|
316 |
+
desc: null
|
317 |
+
value: 1
|
318 |
+
eval_accumulation_steps:
|
319 |
+
desc: null
|
320 |
+
value: null
|
321 |
+
eval_delay:
|
322 |
+
desc: null
|
323 |
+
value: 0
|
324 |
+
learning_rate:
|
325 |
+
desc: null
|
326 |
+
value: 2.0e-05
|
327 |
+
weight_decay:
|
328 |
+
desc: null
|
329 |
+
value: 0.0
|
330 |
+
adam_beta1:
|
331 |
+
desc: null
|
332 |
+
value: 0.9
|
333 |
+
adam_beta2:
|
334 |
+
desc: null
|
335 |
+
value: 0.999
|
336 |
+
adam_epsilon:
|
337 |
+
desc: null
|
338 |
+
value: 1.0e-08
|
339 |
+
max_grad_norm:
|
340 |
+
desc: null
|
341 |
+
value: 1.0
|
342 |
+
num_train_epochs:
|
343 |
+
desc: null
|
344 |
+
value: 1
|
345 |
+
max_steps:
|
346 |
+
desc: null
|
347 |
+
value: -1
|
348 |
+
lr_scheduler_type:
|
349 |
+
desc: null
|
350 |
+
value: cosine
|
351 |
+
lr_scheduler_kwargs:
|
352 |
+
desc: null
|
353 |
+
value: {}
|
354 |
+
warmup_ratio:
|
355 |
+
desc: null
|
356 |
+
value: 0.1
|
357 |
+
warmup_steps:
|
358 |
+
desc: null
|
359 |
+
value: 0
|
360 |
+
log_level:
|
361 |
+
desc: null
|
362 |
+
value: info
|
363 |
+
log_level_replica:
|
364 |
+
desc: null
|
365 |
+
value: warning
|
366 |
+
log_on_each_node:
|
367 |
+
desc: null
|
368 |
+
value: true
|
369 |
+
logging_dir:
|
370 |
+
desc: null
|
371 |
+
value: ./runs/Feb01_17-38-02_ip-26-0-165-24
|
372 |
+
logging_strategy:
|
373 |
+
desc: null
|
374 |
+
value: steps
|
375 |
+
logging_first_step:
|
376 |
+
desc: null
|
377 |
+
value: true
|
378 |
+
logging_steps:
|
379 |
+
desc: null
|
380 |
+
value: 5
|
381 |
+
logging_nan_inf_filter:
|
382 |
+
desc: null
|
383 |
+
value: true
|
384 |
+
save_strategy:
|
385 |
+
desc: null
|
386 |
+
value: steps
|
387 |
+
save_steps:
|
388 |
+
desc: null
|
389 |
+
value: 100
|
390 |
+
save_total_limit:
|
391 |
+
desc: null
|
392 |
+
value: 1
|
393 |
+
save_safetensors:
|
394 |
+
desc: null
|
395 |
+
value: true
|
396 |
+
save_on_each_node:
|
397 |
+
desc: null
|
398 |
+
value: false
|
399 |
+
save_only_model:
|
400 |
+
desc: null
|
401 |
+
value: false
|
402 |
+
no_cuda:
|
403 |
+
desc: null
|
404 |
+
value: false
|
405 |
+
use_cpu:
|
406 |
+
desc: null
|
407 |
+
value: false
|
408 |
+
use_mps_device:
|
409 |
+
desc: null
|
410 |
+
value: false
|
411 |
+
seed:
|
412 |
+
desc: null
|
413 |
+
value: 42
|
414 |
+
data_seed:
|
415 |
+
desc: null
|
416 |
+
value: null
|
417 |
+
jit_mode_eval:
|
418 |
+
desc: null
|
419 |
+
value: false
|
420 |
+
use_ipex:
|
421 |
+
desc: null
|
422 |
+
value: false
|
423 |
+
bf16:
|
424 |
+
desc: null
|
425 |
+
value: true
|
426 |
+
fp16:
|
427 |
+
desc: null
|
428 |
+
value: false
|
429 |
+
fp16_opt_level:
|
430 |
+
desc: null
|
431 |
+
value: O1
|
432 |
+
half_precision_backend:
|
433 |
+
desc: null
|
434 |
+
value: auto
|
435 |
+
bf16_full_eval:
|
436 |
+
desc: null
|
437 |
+
value: false
|
438 |
+
fp16_full_eval:
|
439 |
+
desc: null
|
440 |
+
value: false
|
441 |
+
tf32:
|
442 |
+
desc: null
|
443 |
+
value: null
|
444 |
+
local_rank:
|
445 |
+
desc: null
|
446 |
+
value: 0
|
447 |
+
ddp_backend:
|
448 |
+
desc: null
|
449 |
+
value: null
|
450 |
+
tpu_num_cores:
|
451 |
+
desc: null
|
452 |
+
value: null
|
453 |
+
tpu_metrics_debug:
|
454 |
+
desc: null
|
455 |
+
value: false
|
456 |
+
debug:
|
457 |
+
desc: null
|
458 |
+
value: []
|
459 |
+
dataloader_drop_last:
|
460 |
+
desc: null
|
461 |
+
value: false
|
462 |
+
eval_steps:
|
463 |
+
desc: null
|
464 |
+
value: null
|
465 |
+
dataloader_num_workers:
|
466 |
+
desc: null
|
467 |
+
value: 0
|
468 |
+
past_index:
|
469 |
+
desc: null
|
470 |
+
value: -1
|
471 |
+
run_name:
|
472 |
+
desc: null
|
473 |
+
value: ./
|
474 |
+
disable_tqdm:
|
475 |
+
desc: null
|
476 |
+
value: false
|
477 |
+
remove_unused_columns:
|
478 |
+
desc: null
|
479 |
+
value: true
|
480 |
+
label_names:
|
481 |
+
desc: null
|
482 |
+
value: null
|
483 |
+
load_best_model_at_end:
|
484 |
+
desc: null
|
485 |
+
value: false
|
486 |
+
metric_for_best_model:
|
487 |
+
desc: null
|
488 |
+
value: null
|
489 |
+
greater_is_better:
|
490 |
+
desc: null
|
491 |
+
value: null
|
492 |
+
ignore_data_skip:
|
493 |
+
desc: null
|
494 |
+
value: false
|
495 |
+
fsdp:
|
496 |
+
desc: null
|
497 |
+
value: []
|
498 |
+
fsdp_min_num_params:
|
499 |
+
desc: null
|
500 |
+
value: 0
|
501 |
+
fsdp_config:
|
502 |
+
desc: null
|
503 |
+
value:
|
504 |
+
min_num_params: 0
|
505 |
+
xla: false
|
506 |
+
xla_fsdp_grad_ckpt: false
|
507 |
+
fsdp_transformer_layer_cls_to_wrap:
|
508 |
+
desc: null
|
509 |
+
value: null
|
510 |
+
deepspeed:
|
511 |
+
desc: null
|
512 |
+
value: null
|
513 |
+
label_smoothing_factor:
|
514 |
+
desc: null
|
515 |
+
value: 0.0
|
516 |
+
optim:
|
517 |
+
desc: null
|
518 |
+
value: adamw_torch
|
519 |
+
optim_args:
|
520 |
+
desc: null
|
521 |
+
value: null
|
522 |
+
adafactor:
|
523 |
+
desc: null
|
524 |
+
value: false
|
525 |
+
group_by_length:
|
526 |
+
desc: null
|
527 |
+
value: false
|
528 |
+
length_column_name:
|
529 |
+
desc: null
|
530 |
+
value: length
|
531 |
+
report_to:
|
532 |
+
desc: null
|
533 |
+
value:
|
534 |
+
- tensorboard
|
535 |
+
- wandb
|
536 |
+
ddp_find_unused_parameters:
|
537 |
+
desc: null
|
538 |
+
value: null
|
539 |
+
ddp_bucket_cap_mb:
|
540 |
+
desc: null
|
541 |
+
value: null
|
542 |
+
ddp_broadcast_buffers:
|
543 |
+
desc: null
|
544 |
+
value: null
|
545 |
+
dataloader_pin_memory:
|
546 |
+
desc: null
|
547 |
+
value: true
|
548 |
+
dataloader_persistent_workers:
|
549 |
+
desc: null
|
550 |
+
value: false
|
551 |
+
skip_memory_metrics:
|
552 |
+
desc: null
|
553 |
+
value: true
|
554 |
+
use_legacy_prediction_loop:
|
555 |
+
desc: null
|
556 |
+
value: false
|
557 |
+
push_to_hub:
|
558 |
+
desc: null
|
559 |
+
value: true
|
560 |
+
resume_from_checkpoint:
|
561 |
+
desc: null
|
562 |
+
value: null
|
563 |
+
hub_model_id:
|
564 |
+
desc: null
|
565 |
+
value: null
|
566 |
+
hub_strategy:
|
567 |
+
desc: null
|
568 |
+
value: every_save
|
569 |
+
hub_token:
|
570 |
+
desc: null
|
571 |
+
value: <HUB_TOKEN>
|
572 |
+
hub_private_repo:
|
573 |
+
desc: null
|
574 |
+
value: false
|
575 |
+
hub_always_push:
|
576 |
+
desc: null
|
577 |
+
value: false
|
578 |
+
gradient_checkpointing:
|
579 |
+
desc: null
|
580 |
+
value: true
|
581 |
+
gradient_checkpointing_kwargs:
|
582 |
+
desc: null
|
583 |
+
value:
|
584 |
+
use_reentrant: false
|
585 |
+
include_inputs_for_metrics:
|
586 |
+
desc: null
|
587 |
+
value: false
|
588 |
+
fp16_backend:
|
589 |
+
desc: null
|
590 |
+
value: auto
|
591 |
+
push_to_hub_model_id:
|
592 |
+
desc: null
|
593 |
+
value: null
|
594 |
+
push_to_hub_organization:
|
595 |
+
desc: null
|
596 |
+
value: null
|
597 |
+
push_to_hub_token:
|
598 |
+
desc: null
|
599 |
+
value: <PUSH_TO_HUB_TOKEN>
|
600 |
+
mp_parameters:
|
601 |
+
desc: null
|
602 |
+
value: ''
|
603 |
+
auto_find_batch_size:
|
604 |
+
desc: null
|
605 |
+
value: false
|
606 |
+
full_determinism:
|
607 |
+
desc: null
|
608 |
+
value: false
|
609 |
+
torchdynamo:
|
610 |
+
desc: null
|
611 |
+
value: null
|
612 |
+
ray_scope:
|
613 |
+
desc: null
|
614 |
+
value: last
|
615 |
+
ddp_timeout:
|
616 |
+
desc: null
|
617 |
+
value: 1800
|
618 |
+
torch_compile:
|
619 |
+
desc: null
|
620 |
+
value: false
|
621 |
+
torch_compile_backend:
|
622 |
+
desc: null
|
623 |
+
value: null
|
624 |
+
torch_compile_mode:
|
625 |
+
desc: null
|
626 |
+
value: null
|
627 |
+
dispatch_batches:
|
628 |
+
desc: null
|
629 |
+
value: null
|
630 |
+
split_batches:
|
631 |
+
desc: null
|
632 |
+
value: false
|
633 |
+
include_tokens_per_second:
|
634 |
+
desc: null
|
635 |
+
value: false
|
636 |
+
include_num_input_tokens_seen:
|
637 |
+
desc: null
|
638 |
+
value: false
|
639 |
+
neftune_noise_alpha:
|
640 |
+
desc: null
|
641 |
+
value: null
|
642 |
+
max_seq_length:
|
643 |
+
desc: null
|
644 |
+
value: 2048
|
wandb/run-20240201_173828-py26nu6m/files/output.log
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
0%| | 0/1090 [00:00<?, ?it/s][WARNING|logging.py:314] 2024-02-01 17:38:34,491 >> You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
|
3 |
+
{'loss': 13.9185, 'learning_rate': 1.8348623853211012e-07, 'epoch': 0.0}
|
4 |
+
0%|▏ | 3/1090 [00:09<43:27, 2.40s/it]
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
1%|▋ | 9/1090 [00:14<18:27, 1.02s/it]
|
9 |
+
|
10 |
+
|
11 |
+
1%|█ | 14/1090 [00:19<16:33, 1.08it/s]
|
12 |
+
|
13 |
+
|
14 |
+
2%|█▎ | 18/1090 [00:22<16:14, 1.10it/s]
|
15 |
+
|
16 |
+
|
17 |
+
2%|█▋ | 23/1090 [00:27<16:05, 1.11it/s]
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
3%|██▏ | 29/1090 [00:32<16:02, 1.10it/s]
|
22 |
+
|
23 |
+
|
24 |
+
3%|██▌ | 34/1090 [00:37<15:58, 1.10it/s]
|
25 |
+
|
26 |
+
|
27 |
+
3%|██▊ | 38/1090 [00:41<15:53, 1.10it/s]
|
28 |
+
|
29 |
+
|
30 |
+
4%|███ | 42/1090 [00:44<15:50, 1.10it/s]
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
4%|███▋ | 49/1090 [00:51<15:46, 1.10it/s]
|
35 |
+
|
36 |
+
|
37 |
+
5%|████ | 54/1090 [00:55<15:42, 1.10it/s]
|
38 |
+
|
39 |
+
|
40 |
+
5%|████▎ | 58/1090 [00:59<15:38, 1.10it/s]
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
6%|████▊ | 64/1090 [01:04<15:34, 1.10it/s]
|
45 |
+
|
46 |
+
|
47 |
+
6%|█████▏ | 69/1090 [01:09<15:30, 1.10it/s]
|
48 |
+
|
49 |
+
|
50 |
+
7%|█████▍ | 73/1090 [01:12<15:26, 1.10it/s]
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
7%|█████▉ | 80/1090 [01:19<15:18, 1.10it/s]
|
55 |
+
|
56 |
+
|
57 |
+
8%|██████▏ | 84/1090 [01:22<15:16, 1.10it/s]
|
58 |
+
|
59 |
+
|
60 |
+
8%|██████▌ | 89/1090 [01:27<15:09, 1.10it/s]
|
61 |
+
|
62 |
+
|
63 |
+
9%|██████▉ | 93/1090 [01:31<15:08, 1.10it/s]
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
9%|███████▎ | 100/1090 [01:37<14:59, 1.10it/s]
|
68 |
+
9%|███████▎ | 100/1090 [01:37<14:59, 1.10it/s][INFO|trainer.py:2889] 2024-02-01 17:40:13,014 >> Saving model checkpoint to ./tmp-checkpoint-100
|
69 |
+
[INFO|configuration_utils.py:483] 2024-02-01 17:40:13,018 >> Configuration saved in ./tmp-checkpoint-100/config.json
|
70 |
+
[INFO|configuration_utils.py:594] 2024-02-01 17:40:13,020 >> Configuration saved in ./tmp-checkpoint-100/generation_config.json
|
71 |
+
[INFO|modeling_utils.py:2382] 2024-02-01 17:40:16,055 >> Model weights saved in ./tmp-checkpoint-100/pytorch_model.bin
|
72 |
+
[INFO|tokenization_utils_base.py:2432] 2024-02-01 17:40:16,059 >> tokenizer config file saved in ./tmp-checkpoint-100/tokenizer_config.json
|
73 |
+
[INFO|tokenization_utils_base.py:2441] 2024-02-01 17:40:16,061 >> Special tokens file saved in ./tmp-checkpoint-100/special_tokens_map.json
|
74 |
+
/fsx/sanchit/miniconda3/envs/venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1879: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.
|
75 |
+
warnings.warn(
|
76 |
+
[2024-02-01 17:40:16,087] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step100 is about to be saved!
|
77 |
+
[2024-02-01 17:40:16,093] [INFO] [logging.py:96:log_dist] [Rank 0] Saving model checkpoint: ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt
|
78 |
+
[2024-02-01 17:40:16,093] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt...
|
79 |
+
[2024-02-01 17:40:16,210] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./tmp-checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt.
|
80 |
+
[2024-02-01 17:40:16,214] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt...
|
81 |
+
[2024-02-01 17:40:19,957] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt.
|
82 |
+
[2024-02-01 17:40:19,962] [INFO] [engine.py:3393:_save_zero_checkpoint] zero checkpoint saved ./tmp-checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
|
83 |
+
[2024-02-01 17:40:20,277] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step100 is ready now!
|
84 |
+
[INFO|tokenization_utils_base.py:2432] 2024-02-01 17:40:22,999 >> tokenizer config file saved in ./tokenizer_config.json
|
85 |
+
[INFO|tokenization_utils_base.py:2441] 2024-02-01 17:40:23,001 >> Special tokens file saved in ./special_tokens_map.json
|
86 |
+
|
87 |
+
9%|███████▌ | 103/1090 [01:51<41:42, 2.54s/it]
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
10%|████████ | 110/1090 [01:57<17:02, 1.04s/it]
|
92 |
+
|
93 |
+
|
94 |
+
10%|████████▎ | 114/1090 [02:01<15:18, 1.06it/s]
|
95 |
+
|
96 |
+
|
97 |
+
11%|████████▋ | 118/1090 [02:04<14:52, 1.09it/s]
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
11%|█████████▏ | 125/1090 [02:11<14:39, 1.10it/s]
|
102 |
+
|
103 |
+
|
104 |
+
12%|█████████▍ | 129/1090 [02:14<14:34, 1.10it/s]
|
105 |
+
|
106 |
+
|
107 |
+
12%|█████████▊ | 134/1090 [02:19<14:31, 1.10it/s]
|
108 |
+
|
109 |
+
|
110 |
+
13%|██████████▏ | 138/1090 [02:23<14:26, 1.10it/s]
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
13%|██████████▋ | 145/1090 [02:29<14:19, 1.10it/s]
|
115 |
+
|
116 |
+
|
117 |
+
14%|██████████▉ | 149/1090 [02:33<14:17, 1.10it/s]
|
118 |
+
|
119 |
+
|
120 |
+
14%|███████████▏ | 153/1090 [02:36<14:14, 1.10it/s]
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
15%|███████████▋ | 160/1090 [02:43<14:08, 1.10it/s]
|
125 |
+
|
126 |
+
|
127 |
+
15%|████████████ | 164/1090 [02:46<14:07, 1.09it/s]
|
128 |
+
|
129 |
+
|
130 |
+
16%|████████████▍ | 169/1090 [02:51<14:01, 1.09it/s]
|
131 |
+
|
wandb/run-20240201_173828-py26nu6m/files/requirements.txt
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.0.0
|
2 |
+
accelerate==0.23.0
|
3 |
+
aiohttp==3.9.1
|
4 |
+
aiosignal==1.3.1
|
5 |
+
alignment-handbook==0.4.0.dev0
|
6 |
+
annotated-types==0.6.0
|
7 |
+
appdirs==1.4.4
|
8 |
+
astunparse==1.6.3
|
9 |
+
attrs==23.1.0
|
10 |
+
audioread==3.0.1
|
11 |
+
bitsandbytes==0.41.2.post2
|
12 |
+
brotli==1.0.9
|
13 |
+
cachetools==5.3.2
|
14 |
+
certifi==2023.11.17
|
15 |
+
cffi==1.16.0
|
16 |
+
chardet==5.2.0
|
17 |
+
charset-normalizer==2.0.4
|
18 |
+
click==8.1.7
|
19 |
+
cryptography==41.0.7
|
20 |
+
datasets==2.14.6
|
21 |
+
decorator==5.1.1
|
22 |
+
deepspeed==0.12.2
|
23 |
+
dill==0.3.7
|
24 |
+
docker-pycreds==0.4.0
|
25 |
+
docstring-parser==0.15
|
26 |
+
einops==0.7.0
|
27 |
+
evaluate==0.4.0
|
28 |
+
filelock==3.13.1
|
29 |
+
flash-attn==2.5.2
|
30 |
+
flatbuffers==23.5.26
|
31 |
+
frozenlist==1.4.1
|
32 |
+
fsspec==2023.10.0
|
33 |
+
gast==0.5.4
|
34 |
+
gitdb==4.0.11
|
35 |
+
gitpython==3.1.40
|
36 |
+
gmpy2==2.1.2
|
37 |
+
google-auth-oauthlib==1.2.0
|
38 |
+
google-auth==2.26.1
|
39 |
+
google-pasta==0.2.0
|
40 |
+
grpcio==1.60.0
|
41 |
+
h5py==3.10.0
|
42 |
+
hf-transfer==0.1.5
|
43 |
+
hjson==3.1.0
|
44 |
+
huggingface-hub==0.20.1
|
45 |
+
idna==3.4
|
46 |
+
jinja2==3.1.2
|
47 |
+
jiwer==3.0.3
|
48 |
+
joblib==1.3.2
|
49 |
+
keras==2.15.0
|
50 |
+
lazy-loader==0.3
|
51 |
+
libclang==16.0.6
|
52 |
+
librosa==0.10.1
|
53 |
+
llvmlite==0.41.1
|
54 |
+
markdown-it-py==3.0.0
|
55 |
+
markdown==3.5.1
|
56 |
+
markupsafe==2.1.1
|
57 |
+
mdurl==0.1.2
|
58 |
+
mkl-fft==1.3.8
|
59 |
+
mkl-random==1.2.4
|
60 |
+
mkl-service==2.4.0
|
61 |
+
ml-dtypes==0.2.0
|
62 |
+
mpmath==1.3.0
|
63 |
+
msgpack==1.0.7
|
64 |
+
multidict==6.0.4
|
65 |
+
multiprocess==0.70.15
|
66 |
+
networkx==3.1
|
67 |
+
ninja==1.11.1.1
|
68 |
+
nltk==3.8.1
|
69 |
+
numba==0.58.1
|
70 |
+
numpy==1.26.2
|
71 |
+
oauthlib==3.2.2
|
72 |
+
opt-einsum==3.3.0
|
73 |
+
packaging==23.2
|
74 |
+
pandas==2.1.4
|
75 |
+
peft==0.7.1
|
76 |
+
pillow==10.2.0
|
77 |
+
pip==23.3.2
|
78 |
+
platformdirs==4.1.0
|
79 |
+
pooch==1.8.0
|
80 |
+
protobuf==3.20.2
|
81 |
+
psutil==5.9.7
|
82 |
+
py-cpuinfo==9.0.0
|
83 |
+
pyarrow-hotfix==0.6
|
84 |
+
pyarrow==14.0.2
|
85 |
+
pyasn1-modules==0.3.0
|
86 |
+
pyasn1==0.5.1
|
87 |
+
pycparser==2.21
|
88 |
+
pydantic-core==2.16.1
|
89 |
+
pydantic==2.6.0
|
90 |
+
pygments==2.17.2
|
91 |
+
pynvml==11.5.0
|
92 |
+
pyopenssl==23.2.0
|
93 |
+
pysocks==1.7.1
|
94 |
+
python-dateutil==2.8.2
|
95 |
+
pytz==2023.3.post1
|
96 |
+
pyyaml==6.0.1
|
97 |
+
rapidfuzz==3.6.1
|
98 |
+
regex==2023.12.25
|
99 |
+
requests-oauthlib==1.3.1
|
100 |
+
requests==2.31.0
|
101 |
+
responses==0.18.0
|
102 |
+
rich==13.7.0
|
103 |
+
rsa==4.9
|
104 |
+
safetensors==0.4.1
|
105 |
+
scikit-learn==1.3.2
|
106 |
+
scipy==1.11.4
|
107 |
+
sentencepiece==0.1.99
|
108 |
+
sentry-sdk==1.39.1
|
109 |
+
setproctitle==1.3.3
|
110 |
+
setuptools==68.2.2
|
111 |
+
shtab==1.6.5
|
112 |
+
six==1.16.0
|
113 |
+
smmap==5.0.1
|
114 |
+
soundfile==0.12.1
|
115 |
+
soxr==0.3.7
|
116 |
+
sympy==1.12
|
117 |
+
tensorboard-data-server==0.7.2
|
118 |
+
tensorboard==2.15.1
|
119 |
+
tensorflow-cpu==2.15.0.post1
|
120 |
+
tensorflow-estimator==2.15.0
|
121 |
+
tensorflow-io-gcs-filesystem==0.35.0
|
122 |
+
termcolor==2.4.0
|
123 |
+
threadpoolctl==3.2.0
|
124 |
+
tokenizers==0.15.0
|
125 |
+
torch==2.1.2
|
126 |
+
torchaudio==2.1.2
|
127 |
+
torchvision==0.16.2
|
128 |
+
tqdm==4.66.1
|
129 |
+
transformers==4.36.2
|
130 |
+
triton==2.1.0
|
131 |
+
trl==0.7.7
|
132 |
+
typing-extensions==4.7.1
|
133 |
+
tyro==0.7.0
|
134 |
+
tzdata==2023.3
|
135 |
+
urllib3==1.26.18
|
136 |
+
wandb==0.16.1
|
137 |
+
werkzeug==3.0.1
|
138 |
+
wheel==0.41.2
|
139 |
+
wrapt==1.14.1
|
140 |
+
xxhash==3.4.1
|
141 |
+
yarl==1.9.4
|
wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json
ADDED
@@ -0,0 +1,558 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.15.0-1048-aws-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.11.5",
|
4 |
+
"heartbeatAt": "2024-02-01T17:38:28.880477",
|
5 |
+
"startedAt": "2024-02-01T17:38:28.419493",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [
|
9 |
+
"config_full.yaml"
|
10 |
+
],
|
11 |
+
"state": "running",
|
12 |
+
"program": "/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py",
|
13 |
+
"codePathLocal": "run_sft.py",
|
14 |
+
"codePath": "run_sft.py",
|
15 |
+
"git": {
|
16 |
+
"remote": "https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft",
|
17 |
+
"commit": "79a4ae874a71e67016ded927e7d23351e5c7dab8"
|
18 |
+
},
|
19 |
+
"email": null,
|
20 |
+
"root": "/fsx/sanchit/distil-zephyr-1.5b-ssft",
|
21 |
+
"host": "ip-26-0-165-24",
|
22 |
+
"username": "sanchit",
|
23 |
+
"executable": "/fsx/sanchit/miniconda3/envs/venv/bin/python",
|
24 |
+
"cpu_count": 96,
|
25 |
+
"cpu_count_logical": 96,
|
26 |
+
"cpu_freq": {
|
27 |
+
"current": 2731.1230833333334,
|
28 |
+
"min": 0.0,
|
29 |
+
"max": 0.0
|
30 |
+
},
|
31 |
+
"cpu_freq_per_core": [
|
32 |
+
{
|
33 |
+
"current": 2650.0,
|
34 |
+
"min": 0.0,
|
35 |
+
"max": 0.0
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"current": 3598.237,
|
39 |
+
"min": 0.0,
|
40 |
+
"max": 0.0
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"current": 2650.0,
|
44 |
+
"min": 0.0,
|
45 |
+
"max": 0.0
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"current": 2650.0,
|
49 |
+
"min": 0.0,
|
50 |
+
"max": 0.0
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"current": 2650.0,
|
54 |
+
"min": 0.0,
|
55 |
+
"max": 0.0
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"current": 3593.987,
|
59 |
+
"min": 0.0,
|
60 |
+
"max": 0.0
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"current": 2650.0,
|
64 |
+
"min": 0.0,
|
65 |
+
"max": 0.0
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"current": 2650.0,
|
69 |
+
"min": 0.0,
|
70 |
+
"max": 0.0
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"current": 2650.0,
|
74 |
+
"min": 0.0,
|
75 |
+
"max": 0.0
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"current": 2650.0,
|
79 |
+
"min": 0.0,
|
80 |
+
"max": 0.0
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"current": 2650.0,
|
84 |
+
"min": 0.0,
|
85 |
+
"max": 0.0
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"current": 2650.0,
|
89 |
+
"min": 0.0,
|
90 |
+
"max": 0.0
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"current": 3597.474,
|
94 |
+
"min": 0.0,
|
95 |
+
"max": 0.0
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"current": 2650.0,
|
99 |
+
"min": 0.0,
|
100 |
+
"max": 0.0
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"current": 2650.0,
|
104 |
+
"min": 0.0,
|
105 |
+
"max": 0.0
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"current": 3587.972,
|
109 |
+
"min": 0.0,
|
110 |
+
"max": 0.0
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"current": 3597.373,
|
114 |
+
"min": 0.0,
|
115 |
+
"max": 0.0
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"current": 2650.0,
|
119 |
+
"min": 0.0,
|
120 |
+
"max": 0.0
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"current": 2650.0,
|
124 |
+
"min": 0.0,
|
125 |
+
"max": 0.0
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"current": 2650.0,
|
129 |
+
"min": 0.0,
|
130 |
+
"max": 0.0
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"current": 2899.882,
|
134 |
+
"min": 0.0,
|
135 |
+
"max": 0.0
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"current": 2650.0,
|
139 |
+
"min": 0.0,
|
140 |
+
"max": 0.0
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"current": 2650.0,
|
144 |
+
"min": 0.0,
|
145 |
+
"max": 0.0
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"current": 2650.0,
|
149 |
+
"min": 0.0,
|
150 |
+
"max": 0.0
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"current": 3598.404,
|
154 |
+
"min": 0.0,
|
155 |
+
"max": 0.0
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"current": 2650.0,
|
159 |
+
"min": 0.0,
|
160 |
+
"max": 0.0
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"current": 3597.582,
|
164 |
+
"min": 0.0,
|
165 |
+
"max": 0.0
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"current": 2650.0,
|
169 |
+
"min": 0.0,
|
170 |
+
"max": 0.0
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"current": 2650.0,
|
174 |
+
"min": 0.0,
|
175 |
+
"max": 0.0
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"current": 2650.0,
|
179 |
+
"min": 0.0,
|
180 |
+
"max": 0.0
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"current": 2650.0,
|
184 |
+
"min": 0.0,
|
185 |
+
"max": 0.0
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"current": 2650.0,
|
189 |
+
"min": 0.0,
|
190 |
+
"max": 0.0
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"current": 2650.0,
|
194 |
+
"min": 0.0,
|
195 |
+
"max": 0.0
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"current": 2650.0,
|
199 |
+
"min": 0.0,
|
200 |
+
"max": 0.0
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"current": 2650.0,
|
204 |
+
"min": 0.0,
|
205 |
+
"max": 0.0
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"current": 2650.0,
|
209 |
+
"min": 0.0,
|
210 |
+
"max": 0.0
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"current": 2650.0,
|
214 |
+
"min": 0.0,
|
215 |
+
"max": 0.0
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"current": 2650.0,
|
219 |
+
"min": 0.0,
|
220 |
+
"max": 0.0
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"current": 2650.0,
|
224 |
+
"min": 0.0,
|
225 |
+
"max": 0.0
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"current": 2650.0,
|
229 |
+
"min": 0.0,
|
230 |
+
"max": 0.0
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"current": 2650.0,
|
234 |
+
"min": 0.0,
|
235 |
+
"max": 0.0
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"current": 2650.0,
|
239 |
+
"min": 0.0,
|
240 |
+
"max": 0.0
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"current": 2650.0,
|
244 |
+
"min": 0.0,
|
245 |
+
"max": 0.0
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"current": 2650.0,
|
249 |
+
"min": 0.0,
|
250 |
+
"max": 0.0
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"current": 2650.0,
|
254 |
+
"min": 0.0,
|
255 |
+
"max": 0.0
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"current": 2650.0,
|
259 |
+
"min": 0.0,
|
260 |
+
"max": 0.0
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"current": 2650.0,
|
264 |
+
"min": 0.0,
|
265 |
+
"max": 0.0
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"current": 2650.0,
|
269 |
+
"min": 0.0,
|
270 |
+
"max": 0.0
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"current": 2650.0,
|
274 |
+
"min": 0.0,
|
275 |
+
"max": 0.0
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"current": 2650.0,
|
279 |
+
"min": 0.0,
|
280 |
+
"max": 0.0
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"current": 2650.0,
|
284 |
+
"min": 0.0,
|
285 |
+
"max": 0.0
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"current": 2650.0,
|
289 |
+
"min": 0.0,
|
290 |
+
"max": 0.0
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"current": 2650.0,
|
294 |
+
"min": 0.0,
|
295 |
+
"max": 0.0
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"current": 2650.0,
|
299 |
+
"min": 0.0,
|
300 |
+
"max": 0.0
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"current": 2650.0,
|
304 |
+
"min": 0.0,
|
305 |
+
"max": 0.0
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"current": 2650.0,
|
309 |
+
"min": 0.0,
|
310 |
+
"max": 0.0
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"current": 2650.0,
|
314 |
+
"min": 0.0,
|
315 |
+
"max": 0.0
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"current": 2650.0,
|
319 |
+
"min": 0.0,
|
320 |
+
"max": 0.0
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"current": 2650.0,
|
324 |
+
"min": 0.0,
|
325 |
+
"max": 0.0
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"current": 2650.0,
|
329 |
+
"min": 0.0,
|
330 |
+
"max": 0.0
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"current": 2650.0,
|
334 |
+
"min": 0.0,
|
335 |
+
"max": 0.0
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"current": 2650.0,
|
339 |
+
"min": 0.0,
|
340 |
+
"max": 0.0
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"current": 2650.0,
|
344 |
+
"min": 0.0,
|
345 |
+
"max": 0.0
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"current": 2650.0,
|
349 |
+
"min": 0.0,
|
350 |
+
"max": 0.0
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"current": 2650.0,
|
354 |
+
"min": 0.0,
|
355 |
+
"max": 0.0
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"current": 2650.0,
|
359 |
+
"min": 0.0,
|
360 |
+
"max": 0.0
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"current": 2650.0,
|
364 |
+
"min": 0.0,
|
365 |
+
"max": 0.0
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"current": 2650.0,
|
369 |
+
"min": 0.0,
|
370 |
+
"max": 0.0
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"current": 2650.0,
|
374 |
+
"min": 0.0,
|
375 |
+
"max": 0.0
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"current": 2650.0,
|
379 |
+
"min": 0.0,
|
380 |
+
"max": 0.0
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"current": 2650.0,
|
384 |
+
"min": 0.0,
|
385 |
+
"max": 0.0
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"current": 2650.0,
|
389 |
+
"min": 0.0,
|
390 |
+
"max": 0.0
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"current": 2650.0,
|
394 |
+
"min": 0.0,
|
395 |
+
"max": 0.0
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"current": 2650.0,
|
399 |
+
"min": 0.0,
|
400 |
+
"max": 0.0
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"current": 2650.0,
|
404 |
+
"min": 0.0,
|
405 |
+
"max": 0.0
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"current": 2650.0,
|
409 |
+
"min": 0.0,
|
410 |
+
"max": 0.0
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"current": 2650.0,
|
414 |
+
"min": 0.0,
|
415 |
+
"max": 0.0
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"current": 2650.0,
|
419 |
+
"min": 0.0,
|
420 |
+
"max": 0.0
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"current": 2650.0,
|
424 |
+
"min": 0.0,
|
425 |
+
"max": 0.0
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"current": 2650.0,
|
429 |
+
"min": 0.0,
|
430 |
+
"max": 0.0
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"current": 2650.0,
|
434 |
+
"min": 0.0,
|
435 |
+
"max": 0.0
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"current": 2650.0,
|
439 |
+
"min": 0.0,
|
440 |
+
"max": 0.0
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"current": 2650.0,
|
444 |
+
"min": 0.0,
|
445 |
+
"max": 0.0
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"current": 2650.0,
|
449 |
+
"min": 0.0,
|
450 |
+
"max": 0.0
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"current": 2650.0,
|
454 |
+
"min": 0.0,
|
455 |
+
"max": 0.0
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"current": 2650.0,
|
459 |
+
"min": 0.0,
|
460 |
+
"max": 0.0
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"current": 2650.0,
|
464 |
+
"min": 0.0,
|
465 |
+
"max": 0.0
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"current": 2650.0,
|
469 |
+
"min": 0.0,
|
470 |
+
"max": 0.0
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"current": 2650.0,
|
474 |
+
"min": 0.0,
|
475 |
+
"max": 0.0
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"current": 2650.0,
|
479 |
+
"min": 0.0,
|
480 |
+
"max": 0.0
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"current": 2650.0,
|
484 |
+
"min": 0.0,
|
485 |
+
"max": 0.0
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"current": 2650.0,
|
489 |
+
"min": 0.0,
|
490 |
+
"max": 0.0
|
491 |
+
},
|
492 |
+
{
|
493 |
+
"current": 2650.0,
|
494 |
+
"min": 0.0,
|
495 |
+
"max": 0.0
|
496 |
+
},
|
497 |
+
{
|
498 |
+
"current": 2650.0,
|
499 |
+
"min": 0.0,
|
500 |
+
"max": 0.0
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"current": 2650.0,
|
504 |
+
"min": 0.0,
|
505 |
+
"max": 0.0
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"current": 2650.0,
|
509 |
+
"min": 0.0,
|
510 |
+
"max": 0.0
|
511 |
+
}
|
512 |
+
],
|
513 |
+
"disk": {
|
514 |
+
"/": {
|
515 |
+
"total": 290.7472343444824,
|
516 |
+
"used": 57.44935989379883
|
517 |
+
}
|
518 |
+
},
|
519 |
+
"gpu": "NVIDIA H100 80GB HBM3",
|
520 |
+
"gpu_count": 8,
|
521 |
+
"gpu_devices": [
|
522 |
+
{
|
523 |
+
"name": "NVIDIA H100 80GB HBM3",
|
524 |
+
"memory_total": 85520809984
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"name": "NVIDIA H100 80GB HBM3",
|
528 |
+
"memory_total": 85520809984
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"name": "NVIDIA H100 80GB HBM3",
|
532 |
+
"memory_total": 85520809984
|
533 |
+
},
|
534 |
+
{
|
535 |
+
"name": "NVIDIA H100 80GB HBM3",
|
536 |
+
"memory_total": 85520809984
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"name": "NVIDIA H100 80GB HBM3",
|
540 |
+
"memory_total": 85520809984
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"name": "NVIDIA H100 80GB HBM3",
|
544 |
+
"memory_total": 85520809984
|
545 |
+
},
|
546 |
+
{
|
547 |
+
"name": "NVIDIA H100 80GB HBM3",
|
548 |
+
"memory_total": 85520809984
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"name": "NVIDIA H100 80GB HBM3",
|
552 |
+
"memory_total": 85520809984
|
553 |
+
}
|
554 |
+
],
|
555 |
+
"memory": {
|
556 |
+
"total": 1999.9855346679688
|
557 |
+
}
|
558 |
+
}
|
wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/loss": 6.8326, "train/learning_rate": 1.9809800282473014e-05, "train/epoch": 0.16, "train/global_step": 170, "_timestamp": 1706809286.8025768, "_runtime": 178.35070276260376, "_step": 34}
|
wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log
ADDED
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-02-01 17:38:28,445 INFO StreamThr :237521 [internal.py:wandb_internal():86] W&B internal server running at pid: 237521, started at: 2024-02-01 17:38:28.443368
|
2 |
+
2024-02-01 17:38:28,446 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status
|
3 |
+
2024-02-01 17:38:28,454 INFO WriterThread:237521 [datastore.py:open_for_write():85] open: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb
|
4 |
+
2024-02-01 17:38:28,455 DEBUG SenderThread:237521 [sender.py:send():382] send: header
|
5 |
+
2024-02-01 17:38:28,476 DEBUG SenderThread:237521 [sender.py:send():382] send: run
|
6 |
+
2024-02-01 17:38:28,713 INFO SenderThread:237521 [dir_watcher.py:__init__():211] watching files in: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files
|
7 |
+
2024-02-01 17:38:28,713 INFO SenderThread:237521 [sender.py:_start_run_threads():1136] run started: py26nu6m with start time 1706809108.451874
|
8 |
+
2024-02-01 17:38:28,720 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: check_version
|
9 |
+
2024-02-01 17:38:28,720 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: check_version
|
10 |
+
2024-02-01 17:38:28,774 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: run_start
|
11 |
+
2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():32] System info init
|
12 |
+
2024-02-01 17:38:28,833 DEBUG HandlerThread:237521 [system_info.py:__init__():47] System info init done
|
13 |
+
2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:start():194] Starting system monitor
|
14 |
+
2024-02-01 17:38:28,833 INFO SystemMonitor:237521 [system_monitor.py:_start():158] Starting system asset monitoring threads
|
15 |
+
2024-02-01 17:38:28,833 INFO HandlerThread:237521 [system_monitor.py:probe():214] Collecting system info
|
16 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started cpu monitoring
|
17 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started disk monitoring
|
18 |
+
2024-02-01 17:38:28,834 INFO SystemMonitor:237521 [interfaces.py:start():190] Started gpu monitoring
|
19 |
+
2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started memory monitoring
|
20 |
+
2024-02-01 17:38:28,836 INFO SystemMonitor:237521 [interfaces.py:start():190] Started network monitoring
|
21 |
+
2024-02-01 17:38:28,880 DEBUG HandlerThread:237521 [system_info.py:probe():196] Probing system
|
22 |
+
2024-02-01 17:38:28,883 DEBUG HandlerThread:237521 [system_info.py:_probe_git():181] Probing git
|
23 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_probe_git():189] Probing git done
|
24 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:probe():244] Probing system done
|
25 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-1048-aws-x86_64-with-glibc2.31', 'python': '3.11.5', 'heartbeatAt': '2024-02-01T17:38:28.880477', 'startedAt': '2024-02-01T17:38:28.419493', 'docker': None, 'cuda': None, 'args': ('config_full.yaml',), 'state': 'running', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'codePathLocal': 'run_sft.py', 'codePath': 'run_sft.py', 'git': {'remote': 'https://huggingface.co/sanchit-gandhi/distil-zephyr-1.5b-ssft', 'commit': '79a4ae874a71e67016ded927e7d23351e5c7dab8'}, 'email': None, 'root': '/fsx/sanchit/distil-zephyr-1.5b-ssft', 'host': 'ip-26-0-165-24', 'username': 'sanchit', 'executable': '/fsx/sanchit/miniconda3/envs/venv/bin/python', 'cpu_count': 96, 'cpu_count_logical': 96, 'cpu_freq': {'current': 2731.1230833333334, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.237, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3593.987, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.474, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3587.972, 'min': 0.0, 'max': 0.0}, {'current': 3597.373, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2899.882, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3598.404, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 3597.582, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}, {'current': 2650.0, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 290.7472343444824, 'used': 57.44935989379883}}, 'gpu': 'NVIDIA H100 80GB HBM3', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}, {'name': 'NVIDIA H100 80GB HBM3', 'memory_total': 85520809984}], 'memory': {'total': 1999.9855346679688}}
|
26 |
+
2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():224] Finished collecting system info
|
27 |
+
2024-02-01 17:38:28,908 INFO HandlerThread:237521 [system_monitor.py:probe():227] Publishing system info
|
28 |
+
2024-02-01 17:38:28,908 DEBUG HandlerThread:237521 [system_info.py:_save_pip():52] Saving list of pip packages installed into the current environment
|
29 |
+
2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_pip():68] Saving pip packages done
|
30 |
+
2024-02-01 17:38:28,910 DEBUG HandlerThread:237521 [system_info.py:_save_conda():75] Saving list of conda packages installed into the current environment
|
31 |
+
2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
|
32 |
+
2024-02-01 17:38:29,716 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/requirements.txt
|
33 |
+
2024-02-01 17:38:34,420 DEBUG HandlerThread:237521 [system_info.py:_save_conda():87] Saving conda packages done
|
34 |
+
2024-02-01 17:38:34,423 INFO HandlerThread:237521 [system_monitor.py:probe():229] Finished publishing system info
|
35 |
+
2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
36 |
+
2024-02-01 17:38:34,457 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: keepalive
|
37 |
+
2024-02-01 17:38:34,458 DEBUG SenderThread:237521 [sender.py:send():382] send: files
|
38 |
+
2024-02-01 17:38:34,458 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-metadata.json with policy now
|
39 |
+
2024-02-01 17:38:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
40 |
+
2024-02-01 17:38:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
41 |
+
2024-02-01 17:38:34,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
42 |
+
2024-02-01 17:38:34,553 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
43 |
+
2024-02-01 17:38:34,554 DEBUG SenderThread:237521 [sender.py:send():382] send: config
|
44 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
45 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
46 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
47 |
+
2024-02-01 17:38:34,555 WARNING SenderThread:237521 [sender.py:send_metric():1343] Seen metric with glob (shouldn't happen)
|
48 |
+
2024-02-01 17:38:34,555 DEBUG SenderThread:237521 [sender.py:send():382] send: telemetry
|
49 |
+
2024-02-01 17:38:34,721 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/conda-environment.yaml
|
50 |
+
2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-metadata.json
|
51 |
+
2024-02-01 17:38:34,722 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
52 |
+
2024-02-01 17:38:34,796 INFO wandb-upload_0:237521 [upload_job.py:push():131] Uploaded file /tmp/tmpfs5f2n7fwandb/421ry27q-wandb-metadata.json
|
53 |
+
2024-02-01 17:38:36,724 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
54 |
+
2024-02-01 17:38:39,559 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
55 |
+
2024-02-01 17:38:42,110 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
56 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
57 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
58 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: metric
|
59 |
+
2024-02-01 17:38:42,112 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
60 |
+
2024-02-01 17:38:42,113 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
61 |
+
2024-02-01 17:38:42,116 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
62 |
+
2024-02-01 17:38:42,732 INFO Thread-12 :237521 [dir_watcher.py:_on_file_created():271] file/dir created: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
63 |
+
2024-02-01 17:38:44,735 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
64 |
+
2024-02-01 17:38:44,770 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
65 |
+
2024-02-01 17:38:45,673 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
66 |
+
2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
67 |
+
2024-02-01 17:38:45,674 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
68 |
+
2024-02-01 17:38:45,676 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
69 |
+
2024-02-01 17:38:45,737 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
70 |
+
2024-02-01 17:38:46,738 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
71 |
+
2024-02-01 17:38:48,741 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
72 |
+
2024-02-01 17:38:49,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
73 |
+
2024-02-01 17:38:49,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
74 |
+
2024-02-01 17:38:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
75 |
+
2024-02-01 17:38:50,190 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
76 |
+
2024-02-01 17:38:50,230 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
77 |
+
2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
78 |
+
2024-02-01 17:38:50,232 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
79 |
+
2024-02-01 17:38:50,234 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
80 |
+
2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
81 |
+
2024-02-01 17:38:50,745 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
82 |
+
2024-02-01 17:38:52,747 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
83 |
+
2024-02-01 17:38:54,715 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
84 |
+
2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
85 |
+
2024-02-01 17:38:54,716 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
86 |
+
2024-02-01 17:38:54,718 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
87 |
+
2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
88 |
+
2024-02-01 17:38:54,751 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
89 |
+
2024-02-01 17:38:55,617 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
90 |
+
2024-02-01 17:38:56,753 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
91 |
+
2024-02-01 17:38:58,756 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
92 |
+
2024-02-01 17:38:59,239 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
93 |
+
2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
94 |
+
2024-02-01 17:38:59,240 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
95 |
+
2024-02-01 17:38:59,242 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
96 |
+
2024-02-01 17:38:59,758 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
97 |
+
2024-02-01 17:39:00,760 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
98 |
+
2024-02-01 17:39:01,049 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
99 |
+
2024-02-01 17:39:01,762 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/config.yaml
|
100 |
+
2024-02-01 17:39:02,763 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
101 |
+
2024-02-01 17:39:03,754 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
102 |
+
2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
103 |
+
2024-02-01 17:39:03,755 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
104 |
+
2024-02-01 17:39:03,757 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
105 |
+
2024-02-01 17:39:03,766 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
106 |
+
2024-02-01 17:39:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
107 |
+
2024-02-01 17:39:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
108 |
+
2024-02-01 17:39:04,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
109 |
+
2024-02-01 17:39:04,767 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
110 |
+
2024-02-01 17:39:06,478 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
111 |
+
2024-02-01 17:39:06,770 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
112 |
+
2024-02-01 17:39:08,293 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
113 |
+
2024-02-01 17:39:08,294 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
114 |
+
2024-02-01 17:39:08,295 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
115 |
+
2024-02-01 17:39:08,297 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
116 |
+
2024-02-01 17:39:08,773 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
117 |
+
2024-02-01 17:39:08,774 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
118 |
+
2024-02-01 17:39:10,776 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
119 |
+
2024-02-01 17:39:11,923 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
120 |
+
2024-02-01 17:39:12,779 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
121 |
+
2024-02-01 17:39:12,828 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
122 |
+
2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
123 |
+
2024-02-01 17:39:12,830 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
124 |
+
2024-02-01 17:39:12,832 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
125 |
+
2024-02-01 17:39:13,781 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
126 |
+
2024-02-01 17:39:14,782 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
127 |
+
2024-02-01 17:39:16,785 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
128 |
+
2024-02-01 17:39:17,363 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
129 |
+
2024-02-01 17:39:17,365 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
130 |
+
2024-02-01 17:39:17,366 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
131 |
+
2024-02-01 17:39:17,367 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
132 |
+
2024-02-01 17:39:17,368 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
133 |
+
2024-02-01 17:39:17,788 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
134 |
+
2024-02-01 17:39:18,789 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
135 |
+
2024-02-01 17:39:19,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
136 |
+
2024-02-01 17:39:19,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
137 |
+
2024-02-01 17:39:19,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
138 |
+
2024-02-01 17:39:20,792 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
139 |
+
2024-02-01 17:39:21,909 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
140 |
+
2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
141 |
+
2024-02-01 17:39:21,911 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
142 |
+
2024-02-01 17:39:21,913 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
143 |
+
2024-02-01 17:39:22,795 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
144 |
+
2024-02-01 17:39:22,796 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
145 |
+
2024-02-01 17:39:22,815 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
146 |
+
2024-02-01 17:39:24,798 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
147 |
+
2024-02-01 17:39:26,448 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
148 |
+
2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
149 |
+
2024-02-01 17:39:26,450 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
150 |
+
2024-02-01 17:39:26,452 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
151 |
+
2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
152 |
+
2024-02-01 17:39:26,802 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
153 |
+
2024-02-01 17:39:28,269 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
154 |
+
2024-02-01 17:39:28,805 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
155 |
+
2024-02-01 17:39:28,836 DEBUG SystemMonitor:237521 [system_monitor.py:_start():172] Starting system metrics aggregation loop
|
156 |
+
2024-02-01 17:39:28,850 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
157 |
+
2024-02-01 17:39:30,807 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
158 |
+
2024-02-01 17:39:31,001 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
159 |
+
2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
160 |
+
2024-02-01 17:39:31,003 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
161 |
+
2024-02-01 17:39:31,005 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
162 |
+
2024-02-01 17:39:31,810 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
163 |
+
2024-02-01 17:39:32,811 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
164 |
+
2024-02-01 17:39:33,729 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
165 |
+
2024-02-01 17:39:34,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
166 |
+
2024-02-01 17:39:34,465 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
167 |
+
2024-02-01 17:39:34,468 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
168 |
+
2024-02-01 17:39:34,814 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
169 |
+
2024-02-01 17:39:35,548 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
170 |
+
2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
171 |
+
2024-02-01 17:39:35,550 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
172 |
+
2024-02-01 17:39:35,552 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
173 |
+
2024-02-01 17:39:35,816 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
174 |
+
2024-02-01 17:39:36,817 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
175 |
+
2024-02-01 17:39:38,820 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
176 |
+
2024-02-01 17:39:39,188 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
177 |
+
2024-02-01 17:39:40,104 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
178 |
+
2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
179 |
+
2024-02-01 17:39:40,105 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
180 |
+
2024-02-01 17:39:40,108 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
181 |
+
2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
182 |
+
2024-02-01 17:39:40,824 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
183 |
+
2024-02-01 17:39:42,826 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
184 |
+
2024-02-01 17:39:44,651 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
185 |
+
2024-02-01 17:39:44,652 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
186 |
+
2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
187 |
+
2024-02-01 17:39:44,653 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
188 |
+
2024-02-01 17:39:44,655 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
189 |
+
2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
190 |
+
2024-02-01 17:39:44,830 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
191 |
+
2024-02-01 17:39:46,833 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
192 |
+
2024-02-01 17:39:48,835 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
193 |
+
2024-02-01 17:39:49,211 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
194 |
+
2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
195 |
+
2024-02-01 17:39:49,212 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
196 |
+
2024-02-01 17:39:49,214 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
197 |
+
2024-02-01 17:39:49,466 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
198 |
+
2024-02-01 17:39:49,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
199 |
+
2024-02-01 17:39:49,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
200 |
+
2024-02-01 17:39:49,838 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
201 |
+
2024-02-01 17:39:50,121 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
202 |
+
2024-02-01 17:39:50,839 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
203 |
+
2024-02-01 17:39:52,842 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
204 |
+
2024-02-01 17:39:53,762 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
205 |
+
2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
206 |
+
2024-02-01 17:39:53,763 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
207 |
+
2024-02-01 17:39:53,765 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
208 |
+
2024-02-01 17:39:53,844 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
209 |
+
2024-02-01 17:39:54,845 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
210 |
+
2024-02-01 17:39:55,580 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
211 |
+
2024-02-01 17:39:56,848 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
212 |
+
2024-02-01 17:39:58,314 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
213 |
+
2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
214 |
+
2024-02-01 17:39:58,316 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
215 |
+
2024-02-01 17:39:58,318 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
216 |
+
2024-02-01 17:39:58,842 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
217 |
+
2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
218 |
+
2024-02-01 17:39:58,852 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
219 |
+
2024-02-01 17:40:00,854 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
220 |
+
2024-02-01 17:40:01,035 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
221 |
+
2024-02-01 17:40:02,853 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
222 |
+
2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
223 |
+
2024-02-01 17:40:02,855 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
224 |
+
2024-02-01 17:40:02,857 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
225 |
+
2024-02-01 17:40:02,858 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
226 |
+
2024-02-01 17:40:02,859 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
227 |
+
2024-02-01 17:40:04,465 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
228 |
+
2024-02-01 17:40:04,466 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
229 |
+
2024-02-01 17:40:04,467 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
230 |
+
2024-02-01 17:40:04,861 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
231 |
+
2024-02-01 17:40:06,498 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
232 |
+
2024-02-01 17:40:06,864 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
233 |
+
2024-02-01 17:40:07,408 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
234 |
+
2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
235 |
+
2024-02-01 17:40:07,409 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
236 |
+
2024-02-01 17:40:07,411 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
237 |
+
2024-02-01 17:40:07,866 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
238 |
+
2024-02-01 17:40:08,867 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
239 |
+
2024-02-01 17:40:10,870 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
240 |
+
2024-02-01 17:40:11,953 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
241 |
+
2024-02-01 17:40:11,954 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
242 |
+
2024-02-01 17:40:11,955 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
243 |
+
2024-02-01 17:40:11,956 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
244 |
+
2024-02-01 17:40:11,958 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
245 |
+
2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
246 |
+
2024-02-01 17:40:12,874 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
247 |
+
2024-02-01 17:40:14,876 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
248 |
+
2024-02-01 17:40:16,879 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
249 |
+
2024-02-01 17:40:17,215 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
250 |
+
2024-02-01 17:40:18,882 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
251 |
+
2024-02-01 17:40:19,958 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
252 |
+
2024-02-01 17:40:19,959 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
253 |
+
2024-02-01 17:40:19,959 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
254 |
+
2024-02-01 17:40:22,278 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
255 |
+
2024-02-01 17:40:22,888 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
256 |
+
2024-02-01 17:40:24,892 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
257 |
+
2024-02-01 17:40:26,895 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
258 |
+
2024-02-01 17:40:27,568 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
259 |
+
2024-02-01 17:40:27,569 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
260 |
+
2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
261 |
+
2024-02-01 17:40:27,571 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
262 |
+
2024-02-01 17:40:27,573 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
263 |
+
2024-02-01 17:40:27,897 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
264 |
+
2024-02-01 17:40:28,844 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
265 |
+
2024-02-01 17:40:28,898 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
266 |
+
2024-02-01 17:40:30,901 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
267 |
+
2024-02-01 17:40:32,115 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
268 |
+
2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
269 |
+
2024-02-01 17:40:32,117 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
270 |
+
2024-02-01 17:40:32,119 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
271 |
+
2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
272 |
+
2024-02-01 17:40:32,904 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
273 |
+
2024-02-01 17:40:33,026 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
274 |
+
2024-02-01 17:40:34,907 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
275 |
+
2024-02-01 17:40:34,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
276 |
+
2024-02-01 17:40:34,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
277 |
+
2024-02-01 17:40:34,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
278 |
+
2024-02-01 17:40:36,665 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
279 |
+
2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
280 |
+
2024-02-01 17:40:36,667 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
281 |
+
2024-02-01 17:40:36,669 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
282 |
+
2024-02-01 17:40:36,910 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
283 |
+
2024-02-01 17:40:36,911 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
284 |
+
2024-02-01 17:40:38,487 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
285 |
+
2024-02-01 17:40:38,913 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
286 |
+
2024-02-01 17:40:40,915 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
287 |
+
2024-02-01 17:40:41,219 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
288 |
+
2024-02-01 17:40:41,220 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
289 |
+
2024-02-01 17:40:41,221 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
290 |
+
2024-02-01 17:40:41,223 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
291 |
+
2024-02-01 17:40:41,917 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
292 |
+
2024-02-01 17:40:42,919 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
293 |
+
2024-02-01 17:40:43,949 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
294 |
+
2024-02-01 17:40:44,922 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
295 |
+
2024-02-01 17:40:45,773 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
296 |
+
2024-02-01 17:40:45,775 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
297 |
+
2024-02-01 17:40:45,776 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
298 |
+
2024-02-01 17:40:45,778 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
299 |
+
2024-02-01 17:40:45,924 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
300 |
+
2024-02-01 17:40:46,925 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
301 |
+
2024-02-01 17:40:48,927 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
302 |
+
2024-02-01 17:40:49,410 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
303 |
+
2024-02-01 17:40:49,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
304 |
+
2024-02-01 17:40:49,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
305 |
+
2024-02-01 17:40:49,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
306 |
+
2024-02-01 17:40:50,362 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
307 |
+
2024-02-01 17:40:50,930 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
308 |
+
2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
309 |
+
2024-02-01 17:40:51,403 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
310 |
+
2024-02-01 17:40:51,405 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
311 |
+
2024-02-01 17:40:51,932 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
312 |
+
2024-02-01 17:40:52,934 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
313 |
+
2024-02-01 17:40:54,873 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
314 |
+
2024-02-01 17:40:54,874 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
315 |
+
2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
316 |
+
2024-02-01 17:40:54,876 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
317 |
+
2024-02-01 17:40:54,878 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
318 |
+
2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
319 |
+
2024-02-01 17:40:54,937 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
320 |
+
2024-02-01 17:40:56,940 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
321 |
+
2024-02-01 17:40:58,846 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
322 |
+
2024-02-01 17:40:58,942 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
323 |
+
2024-02-01 17:40:59,420 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
324 |
+
2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
325 |
+
2024-02-01 17:40:59,422 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
326 |
+
2024-02-01 17:40:59,424 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
327 |
+
2024-02-01 17:40:59,944 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
328 |
+
2024-02-01 17:41:00,330 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
329 |
+
2024-02-01 17:41:00,946 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
330 |
+
2024-02-01 17:41:02,948 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
331 |
+
2024-02-01 17:41:03,975 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
332 |
+
2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
333 |
+
2024-02-01 17:41:03,976 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
334 |
+
2024-02-01 17:41:03,978 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
335 |
+
2024-02-01 17:41:04,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
336 |
+
2024-02-01 17:41:04,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
337 |
+
2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
338 |
+
2024-02-01 17:41:04,952 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
339 |
+
2024-02-01 17:41:04,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
340 |
+
2024-02-01 17:41:05,797 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
341 |
+
2024-02-01 17:41:06,955 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
342 |
+
2024-02-01 17:41:08,527 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
343 |
+
2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
344 |
+
2024-02-01 17:41:08,529 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
345 |
+
2024-02-01 17:41:08,531 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
346 |
+
2024-02-01 17:41:08,958 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
347 |
+
2024-02-01 17:41:08,959 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
348 |
+
2024-02-01 17:41:10,961 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
349 |
+
2024-02-01 17:41:11,264 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
350 |
+
2024-02-01 17:41:12,964 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
351 |
+
2024-02-01 17:41:13,085 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
352 |
+
2024-02-01 17:41:13,086 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
353 |
+
2024-02-01 17:41:13,087 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
354 |
+
2024-02-01 17:41:13,089 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
355 |
+
2024-02-01 17:41:13,966 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
356 |
+
2024-02-01 17:41:14,967 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
357 |
+
2024-02-01 17:41:16,736 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
358 |
+
2024-02-01 17:41:16,969 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
359 |
+
2024-02-01 17:41:17,649 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
360 |
+
2024-02-01 17:41:17,650 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
361 |
+
2024-02-01 17:41:17,651 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
362 |
+
2024-02-01 17:41:17,653 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
363 |
+
2024-02-01 17:41:17,972 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
364 |
+
2024-02-01 17:41:18,973 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
365 |
+
2024-02-01 17:41:19,920 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: stop_status
|
366 |
+
2024-02-01 17:41:19,920 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: stop_status
|
367 |
+
2024-02-01 17:41:19,957 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: internal_messages
|
368 |
+
2024-02-01 17:41:20,976 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
369 |
+
2024-02-01 17:41:22,234 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
370 |
+
2024-02-01 17:41:22,235 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
371 |
+
2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
372 |
+
2024-02-01 17:41:22,236 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
373 |
+
2024-02-01 17:41:22,238 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
374 |
+
2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
375 |
+
2024-02-01 17:41:22,979 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
376 |
+
2024-02-01 17:41:24,981 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
377 |
+
2024-02-01 17:41:26,803 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: partial_history
|
378 |
+
2024-02-01 17:41:26,804 DEBUG SenderThread:237521 [sender.py:send():382] send: history
|
379 |
+
2024-02-01 17:41:26,805 DEBUG SenderThread:237521 [sender.py:send_request():409] send_request: summary_record
|
380 |
+
2024-02-01 17:41:26,806 INFO SenderThread:237521 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
|
381 |
+
2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/wandb-summary.json
|
382 |
+
2024-02-01 17:41:26,985 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
383 |
+
2024-02-01 17:41:27,718 DEBUG HandlerThread:237521 [handler.py:handle_request():146] handle_request: status_report
|
384 |
+
2024-02-01 17:41:28,848 DEBUG SenderThread:237521 [sender.py:send():382] send: stats
|
385 |
+
2024-02-01 17:41:28,987 INFO Thread-12 :237521 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/files/output.log
|
wandb/run-20240201_173828-py26nu6m/logs/debug.log
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Current SDK version is 0.16.1
|
2 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Configure stats pid to 237059
|
3 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /admin/home/sanchit/.config/wandb/settings
|
4 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/settings
|
5 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2024-02-01 17:38:28,434 INFO MainThread:237059 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_sft.py', 'program_abspath': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py', 'program': '/fsx/sanchit/distil-zephyr-1.5b-ssft/run_sft.py'}
|
8 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():524] Logging user logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug.log
|
9 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:_log_setup():525] Logging internal logs to /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_173828-py26nu6m/logs/debug-internal.log
|
10 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():564] calling init triggers
|
11 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
|
12 |
+
config: {}
|
13 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():614] starting backend
|
14 |
+
2024-02-01 17:38:28,435 INFO MainThread:237059 [wandb_init.py:init():618] setting up manager
|
15 |
+
2024-02-01 17:38:28,441 INFO MainThread:237059 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
16 |
+
2024-02-01 17:38:28,451 INFO MainThread:237059 [wandb_init.py:init():624] backend started and connected
|
17 |
+
2024-02-01 17:38:28,453 INFO MainThread:237059 [wandb_init.py:init():716] updated telemetry
|
18 |
+
2024-02-01 17:38:28,475 INFO MainThread:237059 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
|
19 |
+
2024-02-01 17:38:28,720 INFO MainThread:237059 [wandb_run.py:_on_init():2254] communicating current version
|
20 |
+
2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.2 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
|
21 |
+
|
22 |
+
2024-02-01 17:38:28,767 INFO MainThread:237059 [wandb_init.py:init():800] starting run threads in backend
|
23 |
+
2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_console_start():2233] atexit reg
|
24 |
+
2024-02-01 17:38:34,465 INFO MainThread:237059 [wandb_run.py:_redirect():2088] redirect: wrap_raw
|
25 |
+
2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2153] Wrapping output streams.
|
26 |
+
2024-02-01 17:38:34,466 INFO MainThread:237059 [wandb_run.py:_redirect():2178] Redirects installed.
|
27 |
+
2024-02-01 17:38:34,467 INFO MainThread:237059 [wandb_init.py:init():841] run started, returning control to user process
|
28 |
+
2024-02-01 17:38:34,468 INFO MainThread:237059 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 32768, 'hidden_size': 4096, 'intermediate_size': 14336, 'num_hidden_layers': 6, 'num_attention_heads': 32, 'sliding_window': 4096, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'use_cache': False, 'rope_theta': 10000.0, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['MistralForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'sanchit-gandhi/Mistral-7B-v0.1-6-layer', 'transformers_version': '4.36.2', 'model_type': 'mistral', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb01_17-38-02_ip-26-0-165-24', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 5, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'max_seq_length': 2048}
|
wandb/run-20240201_173828-py26nu6m/run-py26nu6m.wandb
ADDED
Binary file (98.4 kB). View file
|
|