Upload AI_models/config.py with huggingface_hub
Browse files- AI_models/config.py +85 -0
AI_models/config.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import configargparse
|
2 |
+
import pathlib
|
3 |
+
import torch
|
4 |
+
import logging
|
5 |
+
import sys
|
6 |
+
|
7 |
+
def get_config(config_file=None):
|
8 |
+
default_config_files = ['config.ini']
|
9 |
+
if config_file:
|
10 |
+
default_config_files = default_config_files + [config_file]
|
11 |
+
|
12 |
+
parser = configargparse.ArgumentParser(
|
13 |
+
description="arguments for CRISPR DL models",
|
14 |
+
default_config_files=default_config_files
|
15 |
+
)
|
16 |
+
parser.add_argument("--output_dir", type=pathlib.Path, default="./CRISPR_results", help="output directory")
|
17 |
+
parser.add_argument("--seed", type=int, default=63036, help="random seed")
|
18 |
+
parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="device")
|
19 |
+
parser.add_argument("--log", type=str, default="WARNING", choices=['CRITICAL', 'FATAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'], help="set logging level")
|
20 |
+
|
21 |
+
parser_dataset = parser.add_argument_group(title="dataset", description="parameters for loading and split dataset")
|
22 |
+
parser_dataset.add_argument("--owner", type=str, default="ljw20180420", help="huggingface user name")
|
23 |
+
parser_dataset.add_argument("--data_name", type=str, default="SX_spcas9", choices=["SX_spcas9", "SX_spymac", "SX_ispymac"])
|
24 |
+
parser_dataset.add_argument("--test_ratio", type=float, default=0.05, help="proportion for test samples")
|
25 |
+
parser_dataset.add_argument("--validation_ratio", type=float, default=0.05, help="proportion for validation samples")
|
26 |
+
|
27 |
+
parser_dataset.add_argument("--ref1len", type=int, default=127, help="length of reference 1")
|
28 |
+
parser_dataset.add_argument("--ref2len", type=int, default=127, help="length of reference 2")
|
29 |
+
parser_dataset.add_argument("--cut1", type=int, default=100, help="cut position of reference 1")
|
30 |
+
parser_dataset.add_argument("--cut2", type=int, default=27, help="cut position of reference 2")
|
31 |
+
|
32 |
+
parser_dataloader = parser.add_argument_group(title="data loader", description="parameters for data loader")
|
33 |
+
parser_dataloader.add_argument("--batch_size", type=int, default=100, help="batch size")
|
34 |
+
|
35 |
+
parser_optimizer = parser.add_argument_group(title="optimizer", description="parameters for optimizer")
|
36 |
+
parser_optimizer.add_argument("--optimizer", type=str, default="adamw_torch", choices=["adamw_hf", "adamw_torch", "adamw_torch_fused", "adamw_apex_fused", "adamw_anyprecision", "adafactor"], help="name of optimizer")
|
37 |
+
parser_optimizer.add_argument("--learning_rate", type=float, default=0.001, help="learn rate of the optimizer")
|
38 |
+
|
39 |
+
parser_scheduler = parser.add_argument_group(title="scheduler", description="parameters for learning rate scheduler")
|
40 |
+
parser_scheduler.add_argument("--scheduler", type=str, default="linear", choices=["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup", "inverse_sqrt", "reduce_lr_on_plateau", "cosine_with_min_lr", "warmup_stable_decay"], help="The scheduler type to use.")
|
41 |
+
parser_scheduler.add_argument("--num_epochs", type=float, default=30.0, help="Total number of training epochs to perform (if not an integer, will perform the decimal part percents of the last epoch before stopping training).")
|
42 |
+
parser_scheduler.add_argument("--warmup_ratio", type=float, default=0.05, help="Ratio of total training steps used for a linear warmup from 0 to learning_rate")
|
43 |
+
|
44 |
+
parser_CRISPR_transformer = parser.add_argument_group(title="CRISPR transformer", description="parameters for CRISPR transformer")
|
45 |
+
parser_CRISPR_transformer.add_argument("--hidden_size", type=int, default=256, help="model embedding dimension")
|
46 |
+
parser_CRISPR_transformer.add_argument("--num_hidden_layers", type=int, default=3, help="number of EncoderLayer")
|
47 |
+
parser_CRISPR_transformer.add_argument("--num_attention_heads", type=int, default=4, help="number of attention heads")
|
48 |
+
parser_CRISPR_transformer.add_argument("--intermediate_size", type=int, default=1024, help="FeedForward intermediate dimension size")
|
49 |
+
parser_CRISPR_transformer.add_argument("--hidden_dropout_prob", type=float, default=0.1, help="The dropout probability for all fully connected layers in the embeddings, encoder, and pooler")
|
50 |
+
parser_CRISPR_transformer.add_argument("--attention_probs_dropout_prob", type=float, default=0.1, help="The dropout ratio for the attention probabilities")
|
51 |
+
|
52 |
+
parser_CRISPR_diffuser = parser.add_argument_group(title="CRISPR diffuser", description="parameters for CRISPR diffuser")
|
53 |
+
parser_CRISPR_diffuser.add_argument("--max_micro_homology", type=int, default=7, help="Clip micro-homology strength to (0, max_micro_homology).")
|
54 |
+
parser_CRISPR_diffuser.add_argument("--MCMC_corrector_factor", nargs='+', type=float, default=[1., 0., 0.001], help="weight of the MCMC corrector term")
|
55 |
+
parser_CRISPR_diffuser.add_argument("--unet_channels", nargs='+', type=int, default=[32, 64, 96, 64, 32], help="the output channels of Unet")
|
56 |
+
parser_CRISPR_diffuser.add_argument("--noise_scheduler", type=str, default="exp", choices=["linear", "cosine", "exp", "uniform"], help="noise scheduler used for diffuser model")
|
57 |
+
parser_CRISPR_diffuser.add_argument("--noise_timesteps", type=int, default=20, help="number of noise scheduler time steps")
|
58 |
+
parser_CRISPR_diffuser.add_argument("--cosine_factor", type=float, default=0.008, help="parameter control cosine noise scheduler")
|
59 |
+
parser_CRISPR_diffuser.add_argument("--exp_scale", type=float, default=5.0, help="scale factor of exponential noise scheduler")
|
60 |
+
parser_CRISPR_diffuser.add_argument("--exp_base", type=float, default=5.0, help="base parameter of exponential noise scheduler")
|
61 |
+
parser_CRISPR_diffuser.add_argument("--uniform_scale", type=float, default=1.0, help="scale parameter for uniform scheduler")
|
62 |
+
parser_CRISPR_diffuser.add_argument("--display_scale_factor", type=float, default=0.1, help="exponential scale of the distribution image")
|
63 |
+
|
64 |
+
parser_inDelphi = parser.add_argument_group(title="inDelphi", description="parameters for inDelphi")
|
65 |
+
parser_inDelphi.add_argument("--DELLEN_LIMIT", type=int, default=60, help="deletion length upper limit of inDelphi model")
|
66 |
+
|
67 |
+
parser_Lindel = parser.add_argument_group(title="Lindel", description="parameters for Lindel")
|
68 |
+
parser_Lindel.add_argument("--Lindel_dlen", type=int, default=30, help="the upper limit of deletion length (strictly less than dlen)")
|
69 |
+
parser_Lindel.add_argument("--Lindel_mh_len", type=int, default=4, help="the upper limit of micro-homology length")
|
70 |
+
parser_Lindel.add_argument("--Lindel_reg_const", type=float, default=0.01, help="regularization coefficient")
|
71 |
+
parser_Lindel.add_argument("--Lindel_reg_mode", type=str, default="l2", choices=["l2", "l1"], help="regularization method")
|
72 |
+
|
73 |
+
parser_FOREcasT = parser.add_argument_group(title="FOREcasT", description="parameters for FOREcasT")
|
74 |
+
parser_FOREcasT.add_argument("--FOREcasT_MAX_DEL_SIZE", type=int, default=30, help="max deletion size")
|
75 |
+
parser_FOREcasT.add_argument("--FOREcasT_reg_const", type=float, default=0.01, help="regularization coefficient for deletion")
|
76 |
+
parser_FOREcasT.add_argument("--FOREcasT_i1_reg_const", type=float, default=0.01, help="regularization coefficient for insertion")
|
77 |
+
|
78 |
+
return parser.parse_args()
|
79 |
+
|
80 |
+
def get_logger(args):
|
81 |
+
logger = logging.getLogger("logger")
|
82 |
+
handler = logging.StreamHandler(stream=sys.stdout)
|
83 |
+
handler.setLevel(args.log)
|
84 |
+
logger.addHandler(handler)
|
85 |
+
return logger
|