data: format: zarr resolution: n320 frequency: 6h timestep: 6h forcing: - cos_latitude - cos_longitude - sin_latitude - sin_longitude - cos_julian_day - cos_local_time - sin_julian_day - sin_local_time - insolation - lsm - sdor - slor - z diagnostic: - tp - cp - sf - tcc - hcc - lcc - mcc - ro - ssrd - strd - 100u - 100v remapped: null normalizer: default: mean-std remap: cp: tp sf: tp std: - tp - cp - sf - ro - tcw - ssrd - q_50 - q_100 - q_150 - q_200 - q_250 - q_300 - q_400 - q_500 - q_600 - q_700 - q_850 - q_925 - q_1000 min-max: null max: - sdor - slor - z none: - cos_latitude - cos_longitude - sin_latitude - sin_longitude - cos_julian_day - cos_local_time - sin_julian_day - sin_local_time - insolation - lsm - tcc - mcc - hcc - lcc - swvl1 - swvl2 imputer: default: none remapper: default: none processors: normalizer: _target_: anemoi.models.preprocessing.normalizer.InputNormalizer _convert_: all config: default: mean-std remap: cp: tp sf: tp std: - tp - cp - sf - ro - tcw - ssrd - q_50 - q_100 - q_150 - q_200 - q_250 - q_300 - q_400 - q_500 - q_600 - q_700 - q_850 - q_925 - q_1000 min-max: null max: - sdor - slor - z none: - cos_latitude - cos_longitude - sin_latitude - sin_longitude - cos_julian_day - cos_local_time - sin_julian_day - sin_local_time - insolation - lsm - tcc - mcc - hcc - lcc - swvl1 - swvl2 num_features: 115 dataloader: prefetch_factor: 2 pin_memory: True read_group_size: 4 num_workers: training: 4 validation: 4 test: 8 predict: 8 batch_size: training: 1 validation: 1 test: 4 predict: 4 limit_batches: training: null validation: 10 test: 20 predict: 20 dataset: ${hardware.paths.data}/${hardware.files.dataset} land_dataset: ${hardware.paths.data}/${hardware.files.dataset_land} land_variables: [100u, 100v, swvl1, swvl2, stl1, stl2, tcc, lcc, mcc, hcc, sf, ro, strd, ssrd] training: dataset: - dataset: ${dataloader.dataset} start: null end: 2022 frequency: ${data.frequency} drop: [] - dataset: ${dataloader.land_dataset} start: null end: 2022 frequency: ${data.frequency} select: ${dataloader.land_variables} start: null end: 2022 drop: [] validation: dataset: - dataset: ${dataloader.dataset} start: 2022 end: 2022 frequency: ${data.frequency} drop: [] - dataset: ${dataloader.land_dataset} start: 2022 end: 2022 frequency: ${data.frequency} select: ${dataloader.land_variables} start: 2022 end: 2022 drop: [] validation_rollout: 1 diagnostics: plot: asynchronous: False datashader: True frequency: batch: 750 epoch: 10 parameters: [tp] sample_idx: 0 callbacks: - _target_: anemoi.training.diagnostics.callbacks.plot.PlotLoss parameter_groups: moisture: [tp, cp, tcw] sfc_wind: [10u, 10v] - _target_: anemoi.training.diagnostics.callbacks.plot.PlotSample sample_idx: 0 per_sample: 6 parameters: [tp] accumulation_levels_plot: [0, 0.05, 0.1, 0.25, 0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 100] cmap_accumulation: - "#ffffff" - "#04e9e7" - "#019ff4" - "#0300f4" - "#02fd02" - "#01c501" - "#008e00" - "#fdf802" - "#e5bc00" - "#fd9500" - "#fd0000" - "#d40000" - "#bc0000" - "#f800fd" precip_and_related_fields: [tp, cp] enabled: True scatter: False mode: asyncio callbacks: {} benchmark_profiler: memory: enabled: True steps: 5 warmup: 2 extra_plots: False trace_rank0_only: False time: enabled: True verbose: False speed: enabled: True system: enabled: True model_summary: enabled: True snapshot: enabled: True steps: 4 warmup: 0 debug: anomaly_detection: False profiler: False enable_checkpointing: True checkpoint: every_n_minutes: save_frequency: 30 num_models_saved: 3 every_n_epochs: save_frequency: 1 num_models_saved: 3 every_n_train_steps: save_frequency: null num_models_saved: 0 log: wandb: enabled: False tensorboard: enabled: False mlflow: enabled: False interval: 100 enable_progress_bar: True print_memory_summary: False hardware: paths: data: ${oc.decode:${oc.env:DATASETS_PATH}} output: ${oc.decode:${oc.env:OUTPUT_DIR}} logs: base: ${hardware.paths.output}/logs wandb: ${hardware.paths.output}/logs/wandb mlflow: ${hardware.paths.output}/logs/mlflow tensorboard: ${hardware.paths.output}/logs/tensorboard checkpoints: ${hardware.paths.output}/checkpoint plots: ${hardware.paths.output}/plots profiler: ${hardware.paths.output}/profiler graph: ${hardware.paths.output}/graphs files: dataset: aifs-ea-an-oper-0001-mars-n320-1979-2022-6h-v6.zarr dataset_land: aifs-ea-an-oper-0001-mars-n320-1979-2023-6h-v1-land.zarr graph: graph_enc_proc_dec_n320.pt checkpoint: every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e} every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d} every_n_minutes: aifs-by_time-epoch_{epoch:03d}-step_{step:06d} warm_start: null accelerator: auto num_gpus_per_node: 4 num_nodes: 16 num_gpus_per_model: 4 graph: overwrite: True data: data hidden: hidden nodes: data: node_builder: _target_: anemoi.graphs.nodes.ZarrDatasetNodes dataset: ${dataloader.dataset} attributes: area_weight: _target_: anemoi.graphs.nodes.attributes.AreaWeights norm: unit-max hidden: node_builder: _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes grid: o96 edges: - source_name: data target_name: hidden edge_builder: _target_: anemoi.graphs.edges.CutOffEdges cutoff_factor: 0.6 attributes: edge_length: _target_: anemoi.graphs.edges.attributes.EdgeLength norm: unit-std edge_dirs: _target_: anemoi.graphs.edges.attributes.EdgeDirection norm: unit-std - source_name: hidden target_name: data edge_builder: _target_: anemoi.graphs.edges.KNNEdges num_nearest_neighbours: 3 attributes: edge_length: _target_: anemoi.graphs.edges.attributes.EdgeLength norm: unit-std edge_dirs: _target_: anemoi.graphs.edges.attributes.EdgeDirection norm: unit-std model: activation: GELU num_channels: 1024 model: _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec processor: _target_: anemoi.models.layers.processor.TransformerProcessor _convert_: all activation: GELU num_layers: 16 num_chunks: 2 mlp_hidden_ratio: 4 num_heads: 16 window_size: 1120 dropout_p: 0 encoder: _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper _convert_: all trainable_size: 8 sub_graph_edge_attributes: [edge_length, edge_dirs] activation: GELU num_chunks: 1 mlp_hidden_ratio: 4 num_heads: 16 decoder: _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper _convert_: all trainable_size: 8 sub_graph_edge_attributes: [edge_length, edge_dirs] activation: GELU num_chunks: 1 mlp_hidden_ratio: 4 num_heads: 16 trainable_parameters: data: 8 hidden: 8 data2hidden: 8 hidden2data: 8 attributes: edges: [edge_length, edge_dirs] nodes: [] node_loss_weight: area_weight bounding: - _target_: anemoi.models.layers.bounding.ReluBounding variables: - tp - ro - tcw - ssrd - q_50 - q_100 - q_150 - q_200 - q_250 - q_300 - q_400 - q_500 - q_600 - q_700 - q_850 - q_925 - q_1000 - _target_: anemoi.models.layers.bounding.HardtanhBounding variables: [tcc, swvl1, swvl2] min_val: 0 max_val: 1 - _target_: anemoi.models.layers.bounding.FractionBounding variables: [cp, sf] min_val: 0 max_val: 1 total_var: tp - _target_: anemoi.models.layers.bounding.FractionBounding variables: [lcc, mcc, hcc] min_val: 0 max_val: 1 total_var: tcc training: run_id: null fork_run_id: null load_weights_only: null deterministic: False precision: 16-mixed multistep_input: 2 accum_grad_batches: 1 num_sanity_val_steps: 6 gradient_clip: val: 32 algorithm: value swa: enabled: False lr: 0.0001 zero_optimizer: False training_loss: _target_: anemoi.training.losses.mse.WeightedMSELoss scalars: [variable, loss_weights_mask] ignore_nans: False loss_gradient_scaling: False validation_metrics: - _target_: anemoi.training.losses.mse.WeightedMSELoss scalars: [] ignore_nans: True rollout: start: 1 epoch_increment: 0 max: 1 max_epochs: null max_steps: 260000 lr: rate: 0.00003125 iterations: 260000 min: 3.0e-7 variable_loss_scaling: default: 1 pl: q: 0.6 t: 6 u: 0.8 v: 0.5 w: 0.001 z: 12 sfc: sp: 10 10u: 0.5 10v: 0.5 100u: 0.1 100v: 0.1 2d: 0.5 tp: 0.025 cp: 0.0025 ro: 0.005 sf: 0.025 tcc: 0.1 mcc: 0.1 lcc: 0.1 hcc: 0.1 swvl2: 200 swvl1: 100 stl2: 10 stl1: 1 ssrd: 0.05 strd: 0.1 metrics: [z_500, t_850, u_850, v_850] pressure_level_scaler: _target_: anemoi.training.data.scaling.ReluPressureLevelScaler minimum: 0.2 slope: 0.001