sociolome / config /basic /rt.jsonnet
Gosse Minnema
Initial commit
05922fb
local env = import "../env.jsonnet";
local base = import "basic.jsonnet";
local debug = false;
# re-train
local pretrained_path = env.str("PRETRAINED_PATH", "cache/fn/best");
local rt_lr = env.json("RT_LR", 5e-5);
# module
local cuda_devices = base.cuda_devices;
{
dataset_reader: base.dataset_reader,
train_data_path: base.train_data_path,
validation_data_path: base.validation_data_path,
test_data_path: base.test_data_path,
datasets_for_vocab_creation: ["train"],
data_loader: base.data_loader,
validation_data_loader: base.validation_data_loader,
model: {
type: "span",
word_embedding: {
"_pretrained": {
"archive_file": pretrained_path,
"module_path": "word_embedding",
"freeze": false,
}
},
span_extractor: {
"_pretrained": {
"archive_file": pretrained_path,
"module_path": "_span_extractor",
"freeze": false,
}
},
span_finder: {
"_pretrained": {
"archive_file": pretrained_path,
"module_path": "_span_finder",
"freeze": false,
}
},
span_typing: {
type: 'mlp',
hidden_dims: base.model.span_typing.hidden_dims,
},
metrics: [{type: "srl"}],
typing_loss_factor: base.model.typing_loss_factor,
label_dim: base.model.label_dim,
max_decoding_spans: 128,
max_recursion_depth: 2,
debug: debug,
},
trainer: {
num_epochs: base.trainer.num_epochs,
patience: base.trainer.patience,
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
validation_metric: "+arg-c_f",
num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
optimizer: {
type: "transformer",
base: {
type: "adam",
lr: base.trainer.optimizer.base.lr,
},
embeddings_lr: 0.0,
encoder_lr: 1e-5,
pooler_lr: 1e-5,
layer_fix: base.trainer.optimizer.layer_fix,
parameter_groups: [
[['_span_finder.*'], {'lr': rt_lr}],
[['_span_extractor.*'], {'lr': rt_lr}],
]
}
},
[if std.length(cuda_devices) > 1 then "distributed"]: {
"cuda_devices": cuda_devices
},
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
}