YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co./docs/hub/model-cards#model-card-metadata)

Experiment Configuration

callbacks:
  grad_accum:
    _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
    scheduling:
      0: 4
  grad_norm:
    _target_: src.callbacks.grad_norm.GradNorm
    check_clipping: false
    group_separator: /
    histogram_freq: null
    log_weight_distribution: false
    norm_type: 2
    only_total: true
  lr_monitor:
    _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
  model_checkpoint:
    _target_: src.callbacks.model_checkpoint.ModelCheckpoint
    dirpath: .checkpoints
    enable_version_counter: false
    every_n_train_steps: 2000
    filename: '{step}'
    save_initial_checkpoint: true
    save_last: link
    save_top_k: -1
    verbose: true
  speed_monitor:
    _target_: src.callbacks.speed_monitor.SpeedMonitor
data:
  batch_size: 16
  drop_last: false
  eval_batch_size: 64
  multiprocessing_context: null
  num_workers: 8
  persistent_workers: false
  pin_memory: true
  prefetch_factor: 2
  shuffle: true
dataset: minipile
loggers:
  tensorboard:
    _target_: src.loggers.TensorBoardLogger
    name: ''
    save_dir: ./
    version: ./
model: smol_llama-81M-tied
optim:
  lr: 0.0006
  num_warmup_steps: 2000
  optim_kwargs:
    betas:
    - 0.9
    - 0.95
    eps: 1.0e-08
    fused: true
  optim_name: adamw
  scheduler_kwargs:
    min_lr_ratio: 0.01
    num_decay_steps: 2000
    num_stable_steps: 46000
  scheduler_name: warmup_stable_decay
  weight_decay: 0.1
out_parent_folder: model_train
resume_from_checkpoint: null
run_folder: minipile/smol_llama-81M-tied_wordpiece32000minipile_2025-02-03T17-14-42
save_initial_checkpoint: true
seed: 42
tok_name: wordpiece32000minipile
tok_path: /home/pl487/rdd/outputs/tokenizers/wordpiece32000minipile
torch_compile: true
train_data_path: /home/pl487/rdd/data/minipile/wordpiece32000minipile/train
trainer:
  accelerator: gpu
  deterministic: false
  enable_progress_bar: true
  fast_dev_run: false
  gradient_clip_algorithm: norm
  gradient_clip_val: 1.0
  limit_val_batches: 500
  log_every_n_steps: 1
  max_steps: 50000
  precision: bf16-true
  val_check_interval: 2000
val_data_path: /home/pl487/rdd/data/minipile/wordpiece32000minipile/validation
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.

Collection including pietrolesci/smol_llama-81M-tied_wordpiece32000minipile