-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConfig.py
executable file
·204 lines (162 loc) · 15.5 KB
/
Config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import argparse
import yaml
import os,sys
def main(argv):
parser = argparse.ArgumentParser(description='Create a YAML configuration file for EGAT to use. Also runs the EGAT with all steps in one go.')
parser.add_argument('-output', default = 'EGAT.yaml', help='Output YAML file name')
parser.add_argument('-purpose', type = str, default = 'Train', help='What the goal of the run is.')
parser.add_argument('--weightsandbiases', action='store_true', help='Check if need to live monitor things with weights and biases.')
parser.add_argument('--wandbproject', type=str, default=None,help='Name of project to save for live monitoring.')
parser.add_argument('--wandbrun', type=str, default=None,help='Name of run to save for live monitoring.')
parser.add_argument('--startpoint', type=str, default=None,help='Kind of Starting Point to use for Training.')
parser.add_argument('--base_model', type=str, default=None,help='Trained model used for Transfer Learning and Prediction')
parser.add_argument('--gpu', type=int, default=1,help='Number of gpus involved.')
parser.add_argument('--input', type=str, default=None,help='Path where the data is stored')
parser.add_argument('--data_path', type=str, default=None,help='Path where the data is stored')
parser.add_argument('--save_path', type=str, default=None,help='Path where the data is stored')
parser.add_argument('--exclude', type=str, default = 'exclude.txt',help='Location of the Exclude file.')
parser.add_argument('--class_choice', type=str, default=None,help='Types of reactions to look at.')
parser.add_argument('--npoints', type=str, default=33,help='Loss used for Training.')
parser.add_argument('--split', type=str, default=None,help='Loss used for Training.')
parser.add_argument('--randomize', action='store_true', help='Types of reactions to look at.')
parser.add_argument('--fold', type=int,default=None, help='Fold to look at.')
parser.add_argument('--foldtype', type=str,default=None, help='Fold type to look at')
parser.add_argument('--size', type=int, default=None, help='Look at the first N samples.')
parser.add_argument('--batch_size', type=int, default=50, help='Batch size of data.')
parser.add_argument('--shuffle', action='store_true', help='shuffles the data at every iteration.')
parser.add_argument('--datastorage', type=str, default=None, help='How to store data. Default is a folder full of .json files.')
parser.add_argument('--optimizer', type=str, default = 'Adam', help='Kind of Optimizer.')
parser.add_argument('--learning_rate', type=float, default = 1e-05, help='Initial Learning Rate.')
parser.add_argument('--betas', type=float, default = [.9,.999], help='Initial Learning Rate Betas.',nargs=2)
parser.add_argument('--lr_min', type=float, default = 2e-05, help='Minimum Learning Rate.')
parser.add_argument('--lr_decay', type=float, default = .6, help='Learning Rate Decay Rate.')
parser.add_argument('--exp_decay', type=float, default = 5, help='Learning Rate Decay Rate for exponential scheduler.')
parser.add_argument('--weight_decay', type=float, default = 1e-04, help='Learning Rate Decay Rate for weights.')
parser.add_argument('--epsilon', type=float, default = 1e-08, help='Learning Rate Decay Rate factor.')
parser.add_argument('--momentum', type=float, default = .9, help='Learning Rate Decay Rate momentum.')
parser.add_argument('--step_size', type=int, default = 20, help='Learning Rate Decay Rate step size.')
parser.add_argument('--momentum_orig', type=int, default = .1, help='Learning Rate Decay Rate original momentum.')
parser.add_argument('--target', type=str, default = 'DE',help=' Target column or set of columns to predict',nargs='+')
parser.add_argument('--tweights', type=str, default = [.50,.50],help='Weights of the Target column or set of columns to predict',nargs='+')
parser.add_argument('--additionals', type=str, default=None,help='Column or set of columns to use for predictions.',nargs='+')
parser.add_argument('--hasaddons', action='store_true', help='Check if RDKit Features are needed.')
parser.add_argument('--hasnormedaddons', action='store_true', help='Check if NormalizedRDKit Features are needed.')
parser.add_argument('--drop_list', action='store_true', help='Check if NormalizedRDKit Features are needed.')
parser.add_argument('--split_type', type=str, default='random',help='Check if NormalizedRDKit Features are needed.')
parser.add_argument('--model', type=str, help='Directory of the model being used')
parser.add_argument('--model_type', type=str, default='direct',help='Type of model being used')
parser.add_argument('--destination', type=str, help='Where to save the Prediction Results.')
parser.add_argument('--test_only', action='store_true', help='Only look at the validation set for evaulation in Training.')
parser.add_argument('--test_split', type=float, help='Only look at the validation set for evaulation in Training.')
parser.add_argument('--EGAT_layers', type=int,default=4,help='Check if need to obtain the learned fingerprint.')
parser.add_argument('--Aggregate', action='store_true', help='Check if we want to aggregate R and P features after EGAT instead of getting the difference.')
parser.add_argument('--AddOnAgg', action='store_true', help='Check if we want to aggregate R and P RDKit features after EGAT instead of getting the difference.')
parser.add_argument('--Norm', action='store_true', help='Add Normalization to Columns.')
parser.add_argument('--molecular', action='store_true', help='Types of reactions to look at.')
parser.add_argument('--Residual', type=str, default = None,help='Check if need to obtain the learned fingerprint.')
parser.add_argument('--SA', action='store_true',help='Check if need to obtain the learned fingerprint.')
parser.add_argument('--AttnMaps', action='store_true',help='Check if need to obtain the learned fingerprint.')
parser.add_argument('--Embed', action='store_true', help='Check if need to obtain the learned fingerprint.')
parser.add_argument('--UMAP', action='store_true', help='Run UMAP on the embeddings.')
parser.add_argument('--smiles', type=str, default= 'smiles',help='Column with the smiles strings.')
parser.add_argument('--folders', type=str, default='Molecularity',help='Way to make the folders.')
parser.add_argument('--atom_map', action='store_true', help='Tells EGAT if we need to make our own atom-mapping.')
parser.add_argument('--method_mapping', type=str, help='Way to Atom-Map Reactions. Do not use for running molecular prediction.')
parser.add_argument('--getradical', type=str, default = None,help='Method by which we can grab radical electron counts. (RDKit or YARP)')
parser.add_argument('--getspiro', action='store_true',help='Method by which we can grab radical electron counts. (RDKit or YARP)')
parser.add_argument('--getbridgehead', action='store_true',help='Method by which we can grab radical electron counts. (RDKit or YARP)')
parser.add_argument('--gethbinfo', action='store_true',help='Method by which we can grab radical electron counts. (RDKit or YARP)')
parser.add_argument('--geteneg', action='store_true',help='Method by which we can grab radical electron counts. (RDKit or YARP)')
parser.add_argument('--onlyH', action='store_true', help='Check if we only need to look at neighboring H.')
parser.add_argument('--removeelementinfo', action='store_true', help='Does not look at element info. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removereactiveinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removeringinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removeformalchargeinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removearomaticity', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removechiralinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removehybridinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removeneighborcount', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--getbondrot', action='store_true', help='Check if we need to get the location of rotatable bonds.')
parser.add_argument('--getbondpolarity', action='store_true', help='Check if we need to get the location of rotatable bonds.')
parser.add_argument('--removebondorderinfo', action='store_true', help='Does not look at element info. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removebondtypeinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removeconjinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--removestereoinfo', action='store_true', help='Does not look at distance from reacting atoms. This is useful for sensitivity analysis of variables.')
parser.add_argument('--epoch', type=int, default = 30, help='Epochs with changing learning rate.')
parser.add_argument('--epoch_const', type=int, default = 10, help='Epochs with constant learning rate.')
parser.add_argument('--warmup', type=int, default = 2, help='Learning Rate Decay Rate.')
parser.add_argument('--patience', type=int, default = 2, help='Learning Rate Decay Rate.')
parser.add_argument('--scheduler', type=str, default = 'cos', help='Learning Rate Decay Rate.')
parser.add_argument('--train_loss', type=str, default='MAE', help='Loss used for Training.')
parser.add_argument('--pred_loss', type=str, default = 'MAE',help='Loss used in model prediction',nargs='+')
parser.add_argument("--UMAP_model",type=str, default=None, help="Input CSV file of reactions")
parser.add_argument("--umap_input",type=str, default=None, help="Input CSV file of reactions")
parser.add_argument("--umap_outfile", type=str, default=None, help="Output file")
parser.add_argument("--n_neighbors", type=int, default=32, help="Number of neighbors to look at")
parser.add_argument("--dist", type=float, default=.1, help="Overlap")
parser.add_argument("--n_components", type=int, default=1, help="Output Dimensions")
parser.add_argument("--metric", type=str, default='euclidian', help="Method to find distance")
parser.add_argument('--num_workers', type=int, default=1, help='Loss used in model prediction')
parser.add_argument('--nodes',dest='nodes',default=1,help = 'Nodes to run on')
parser.add_argument('--days',dest='days',default='00',help = 'Time length (Day)')
parser.add_argument('--hours',dest='hours',default='23',help = 'Time length (Hr.)')
parser.add_argument('--minutes',dest='minutes',default='50',help = 'Time length (Min.)')
parser.add_argument('--cpus',dest='cpus',default=10,help = 'CPUs to run on')
parser.add_argument('--user',dest='user',default=None,help = 'username')
parser.add_argument('--partition',dest='partition',default='standby',help = 'username')
args = parser.parse_args()
# Convert the Namespace object to a dictionary
args_dict = vars(args)
#print(args_dict)
# Specify the output YAML file name
output_yaml_file = f'config/{args.output}'
#args_dict = {key: value for key, value in args_dict.items() if key.startswith('--')}
with open(output_yaml_file, 'w') as yaml_file:
yaml.dump(args_dict, yaml_file, default_flow_style=False)
with open(output_yaml_file, 'r') as yaml_file:
yaml_data = yaml.load(yaml_file, Loader=yaml.FullLoader)
# Write the dictionary to a YAML file
if args.model_type == 'direct':
if args.molecular:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_1OUT_MOLEC_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_1OUT_MOLEC'}
else:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_1OUT_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_1OUT'}
elif args.model_type in ['multi','BEP','multitask']:
if args.molecular:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_2OUT_MOLEC_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_2OUT_MOLEC'}
else:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_2OUT_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_2OUT'}
elif args.model_type == 'Hr':
if args.molecular:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_Hr_MOLEC_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_Hr_MOLEC'}
else:
if args.hasaddons:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_Hr_RDKIT'}
else:
yaml_data['defaults'] = {'model': 'EGAT_3MLP_Hr'}
if args.save_path is not None:
if not os.path.isdir(args.save_path): os.mkdir(args.save_path)
if not os.path.join(args.save_path,yaml_data['defaults']['model']): os.path.join(args.save_path,yaml_data['defaults']['model'])
yaml_data['hydra'] = {'run':{'dir':os.path.join(args.save_path,yaml_data['defaults']['model'])},'sweep':{'dir':args.save_path,'subdir':yaml_data['defaults']['model']}}
else:
yaml_data['hydra'] = {'run':{'dir':f'log/{args.model_type}/{args.model}'},'sweep':{'dir':f'log/{args.model_type}','subdir':args.model}}
# Write the updated YAML data back to the file
with open(output_yaml_file, 'w') as yaml_file:
yaml.dump(yaml_data, yaml_file, default_flow_style=False)
if __name__ == '__main__':
main(sys.argv[1:])