luismsgomes's picture
added model
8adb2f8
raw
history blame contribute delete
924 Bytes
trainer: "gist"
model_name: "bertimbau-335m-mmarco-pairs-gist1-v1"
base_model_name: "bertimbau-335m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-angle20-v3"
guide_model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-cosent20-v1"
validation_ir: True
validation_ir_corpus_size: 50000
# validation_ir_corpus_size: 500
# see https://huggingface.co./docs/datasets/v2.18.0/en/about_dataset_load
train_dataset_configs:
- alias: "mmarco"
path: "unicamp-dl/mmarco"
name: "portuguese"
split: "train"
# split: "train[1000:2000]"
examples_are_triples: False
examples_are_labelled: False
seed: 1
learning_rate: 1e-5
warmup_ratio: 0.1
weight_decay: 0.01
# batch_size: 100 # 100 fits very tightly (40GB used), could crash on batches of longer texts
batch_size: 85 # 85 uses up to 37.5GB out of 40GB
use_amp: True
epochs: 1
# validations_per_epoch: 1
validations_per_epoch: 100