File size: 1,537 Bytes
05922fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
local env = import "../env.jsonnet";
local base = import "basic.jsonnet";

local pretrained_path = env.str("PRETRAINED_PATH", "cache/basic/best");
local lr = env.json("FT_LR", 5e-5);

# training
local cuda_devices = base.cuda_devices;

{
    dataset_reader: base.dataset_reader,
    train_data_path: base.train_data_path,
    validation_data_path: base.validation_data_path,
    test_data_path: base.test_data_path,
    datasets_for_vocab_creation: ["train"],
    data_loader: base.data_loader,
    validation_data_loader: base.validation_data_loader,

    model: {
        type: "from_archive",
        archive_file: pretrained_path
    },
    vocabulary: {
        type: "from_files",
        directory: pretrained_path + "/vocabulary"
    },

    trainer: {
        num_epochs: base.trainer.num_epochs,
        patience: base.trainer.patience,
        [if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
        validation_metric: "+arg-c_f",
        num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps,
        optimizer: {
            type: "transformer",
            base: {
                type: "adam",
                lr: lr,
            },
            embeddings_lr: 0.0,
            encoder_lr: 1e-5,
            pooler_lr: 1e-5,
            layer_fix: base.trainer.optimizer.layer_fix,
        }
    },

    [if std.length(cuda_devices) > 1 then "distributed"]: {
        "cuda_devices": cuda_devices
    },
    [if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
}