-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
73 lines (65 loc) · 3.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from GenesLists import *
import argparse
import csv
# preset paths are used if no command line parameters are specified
metabolicListDirectory = '/Users/jim/Desktop/enrichment_maps_pathways/kegg_some'
completeListPath = '/Users/jim/Desktop/enrichment_maps_pathways/our_expressed.csv'
targetListPath = '/Users/jim/Desktop/enrichment_maps_pathways/target_68'
outFileName = 'result.csv'
conversionMapPath = 'resources/gene_id_name_map.csv'
# additional config
csvHeaderRow = [
"metabolic_name",
"complete_count",
"metabolic_clean_count",
"target_count",
"intersection_count",
"p_val_hypergeometric_score",
"gene_names"
]
def main():
# command line arguments parsing
parser = argparse.ArgumentParser(description='Specify command line parameters for metabolic, complete and target '
'gene id lists or preset correct paths in `main.py`. From each file '
'only the strings with integer numbers are used.')
parser.add_argument("-metabolics", "-m", dest="metabolicListDirectory",
help="path to folder with metabolic lists", default=metabolicListDirectory,
metavar="folder_path")
parser.add_argument("-complete", "-c", dest="completeListPath",
help="path to file with complete list", default=completeListPath, metavar="file_path")
parser.add_argument("-target", "-t", dest="targetListPath",
help="path to file with target list", default=targetListPath, metavar="file_path")
parser.add_argument("-out", "-o", dest="outFilePath",
help="path to file with result, defaults to `%%target_list_name%%_"+outFileName+"` "
"in target list folder", metavar="file_path")
args = parser.parse_args()
args.outFilePath = args.outFilePath or os.path.splitext(args.targetListPath)[0]+'_'+outFileName
# todo: make convert_ids_to_names optional
# main body
complete = GeneList(args.completeListPath)
target = GeneList(args.targetListPath)
result = csv.writer(open(args.outFilePath, 'w'))
result.writerow(csvHeaderRow)
metabolic_file_list = [
os.path.join(args.metabolicListDirectory, fileName) for fileName in next(os.walk(args.metabolicListDirectory))[2]
]
metabolics = []
for fileName in metabolic_file_list:
metabolics.append(MetabolicList(fileName))
for metabolic in metabolics:
metabolic.intersect_with(complete)
metabolic.intersect_with(target)
metabolic.compute_hypergeometric_score(complete, target)
# metabolic.show(show_gene_ids=False)
metabolic.convert_ids_to_names(conversionMapPath, delimiter_char=';')
result.writerow([
metabolic.name,
complete.initialLength,
metabolic.afterIntersectionLength[0],
target.initialLength,
metabolic.afterIntersectionLength[-1],
"%.20f" % metabolic.hypergeometricScore,
' | '.join(metabolic.geneNames)
])
if __name__ == "__main__":
main()