sociolome / config /fn-evalita /evalita_plus_fn.vanilla_xlmr.freeze.jsonnet
Gosse Minnema
Initial commit
05922fb
local env = import "../env.jsonnet";
#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
local ontology_path = "data/framenet/ontology.tsv";
local debug = false;
# reader
local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
local smoothing_factor = env.json("SMOOTHING", "0.1");
# model
local label_dim = env.json("LABEL_DIM", "64");
local dropout = env.json("DROPOUT", "0.2");
local bio_dim = env.json("BIO_DIM", "512");
local bio_layers = env.json("BIO_LAYER", "2");
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");
# loader
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
local max_training_tokens = 512;
local max_inference_tokens = 1024;
# training
local layer_fix = env.json("LAYER_FIX", "0");
local grad_acc = env.json("GRAD_ACC", "1");
#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
local cuda_devices = [0];
local patience = 32;
{
dataset_reader: {
type: "semantic_role_labeling",
debug: debug,
pretrained_model: pretrained_model,
ignore_label: false,
[ if debug then "max_instances" ]: 128,
event_smoothing_factor: smoothing_factor,
arg_smoothing_factor: smoothing_factor,
},
train_data_path: dataset_path + "/evalita_plus_fn_train.jsonl",
validation_data_path: dataset_path + "/evalita_dev.jsonl",
test_data_path: dataset_path + "/evalita_test.jsonl",
datasets_for_vocab_creation: ["train"],
data_loader: {
batch_sampler: {
type: "mix_sampler",
max_tokens: max_training_tokens,
sorting_keys: ['tokens'],
sampling_ratios: {
'exemplar': 1.0,
'full text': 0.0,
}
}
},
validation_data_loader: {
batch_sampler: {
type: "max_tokens_sampler",
max_tokens: max_inference_tokens,
sorting_keys: ['tokens']
}
},
model: {
type: "span",
word_embedding: {
token_embedders: {
"pieces": {
type: "pretrained_transformer",
model_name: pretrained_model,
train_parameters: false
}
},
},
span_extractor: {
type: 'combo',
sub_extractors: [
{
type: 'self_attentive',
},
{
type: 'bidirectional_endpoint',
}
]
},
span_finder: {
type: "bio",
bio_encoder: {
type: "lstm",
hidden_size: bio_dim,
num_layers: bio_layers,
bidirectional: true,
dropout: dropout,
},
no_label: false,
},
span_typing: {
type: 'mlp',
hidden_dims: span_typing_dims,
},
metrics: [{type: "srl"}],
typing_loss_factor: typing_loss_factor,
ontology_path: null,
label_dim: label_dim,
max_decoding_spans: 128,
max_recursion_depth: 2,
debug: debug,
},
trainer: {
num_epochs: 128,
patience: patience,
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
validation_metric: "+em_f",
grad_norm: 10,
grad_clipping: 10,
num_gradient_accumulation_steps: grad_acc,
optimizer: {
type: "transformer",
base: {
type: "adam",
lr: 1e-3,
},
embeddings_lr: 0.0,
encoder_lr: 1e-5,
pooler_lr: 1e-5,
layer_fix: layer_fix,
}
},
cuda_devices:: cuda_devices,
[if std.length(cuda_devices) > 1 then "distributed"]: {
"cuda_devices": cuda_devices
},
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
}