PreMode / scripts /PreMode.noPretrain /pretrain.seed.0.yaml

Upload folder using huggingface_hub

7718235 verified 9 months ago

2.96 kB

	## data set specific
	dataset: GraphMutationDataset
	data_file_train: /share/vault/Users/gz2294/Data/DMS/ClinVar.HGMD.PrimateAI.syn/training.csv
	data_file_train_ddp_prefix: /share/vault/Users/gz2294/Data/DMS/ClinVar.HGMD.PrimateAI.syn/training
	data_file_test: /share/vault/Users/gz2294/Data/DMS/ClinVar.HGMD.PrimateAI.syn/testing.csv
	data_type: ClinVar
	loop: true # add self loop or not
	node_embedding_type: esm # esm, one-hot, one-hot-idx, or aa-5dim
	graph_type: af2 # af2 or 1d-neighbor
	add_plddt: true # add plddt or not
	add_conservation: true # add conservation or not
	add_position: true # add positional embeddings or not
	add_sidechain: true # add side chain or not
	use_cb: true
	loaded_msa: false
	add_msa: true # add msa or not
	add_dssp: true # add dssp or not
	alt_type: concat # concat or alt
	computed_graph: true
	max_len: 251
	radius: 50 # radius for KNN graph, larger than curoff_upper

	## model specific
	load_model: null
	model_class: PreMode_Star_CON
	model: equivariant-transformer-star2-softmax
	neighbor_embedding: true
	cutoff_lower: 0.0 # graph related
	cutoff_upper: 36.0 # graph related
	max_num_neighbors: 36 # graph related
	x_in_channels: 1313 # x input size, only used if different from x_channels, 1280 + 1 + 20 + 12
	alt_projector: 2593 # alt input size, 1280 + 1 + 20 + 12 + 1280
	x_in_embedding_type: Linear_gelu # x input embedding type, only used if x_in_channels is not None
	x_channels: 512 # x embedding size
	x_hidden_channels: 512 # x hidden size
	vec_in_channels: 35 # vector embedding size
	vec_channels: 32 # vector hidden size
	vec_hidden_channels: 512 # vector hidden size, must be equal to x_channels (why? go to model page)
	distance_influence: both
	share_kv: false
	num_heads: 16 # number of attention heads
	num_layers: 2
	num_edge_attr: 444 # 1, from msa_contacts
	num_nodes: 1
	num_rbf: 32 # number of radial basis functions, use a small size for quicker training
	rbf_type: expnormunlim
	trainable_rbf: true
	num_workers: 10
	output_model: EquivariantBinaryClassificationStarPoolScalar
	reduce_op: mean
	output_dim: 1
	activation: silu
	attn_activation: silu
	# aggr: mean # has to be mean because different protein sizes, removed and set to default (note previous default was add)
	drop_out: 0.6

	## training specific
	trainer_fn: PreMode_trainer
	seed: 0
	lr: 1e-4 # important
	lr_factor: 0.8 # important
	weight_decay: 0.0
	lr_min: 1e-6 # important
	lr_patience: 2 # important
	num_steps_update: 8 # important, how many steps before updating the model, use large number for large batch size
	lr_warmup_steps: 4000 # important
	batch_size: 8
	ngpus: 4
	num_epochs: 20
	loss_fn: weighted_loss_pretrain
	data_split_fn: ""
	y_weight: 1.0
	contrastive_loss_fn: null
	reset_train_dataloader_each_epoch: true
	test_size: null
	train_size: 0.95
	val_size: 0.05


	## log specific
	num_save_epochs: 1
	num_save_batches: 2000 # save every 1000 batches, this also control the validation frequency
	log_dir: /share/vault/Users/gz2294/PreMode.final/CHPs.v4.noPretrain.retrain.seed.0/