-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgwasCatalogApiDownloader
executable file
·72 lines (58 loc) · 1.85 KB
/
gwasCatalogApiDownloader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#! /usr/bin/env python3
# Vivek Rai
# vivekrai@umich.edu
# Parker Lab
#
# Feb 22, 2021
import httpx
import sys
import argparse
import pandas as pd
URI = "https://www.ebi.ac.uk/gwas/summary-statistics/api/studies/{}/associations"
def fetch_associations(accession_id, output):
response = httpx.get(URI.format(accession_id))
if response.status_code != 200:
print(f"Error: {response.status_code}. Check study id.", file=sys.stderr)
sys.exit(1)
associations = response.json()["_embedded"]["associations"]
print(f"Progress: Found {len(associations)} associations..", file=sys.stderr)
clean_df = []
keys_of_interest = [
"variant_id",
"chromosome",
"base_pair_location",
"effect_allele",
"other_allele",
"effect_allele_frequency",
"odds_ratio",
"beta",
"p_value",
"ci_upper",
"ci_lower",
"study_accession",
]
for _, v in associations.items():
_ = {}
for key in keys_of_interest:
_val = v.get(key, "NA")
if _val is None:
_val = "NA"
_[key] = v.get(key, "NA")
_["trait"] = v.get("trait", "[NA]")[0]
clean_df.append(_)
pd.json_normalize(clean_df).to_csv(output, index=False, encoding="utf-8", sep="\t")
return
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="gwasCatalogApiDownloader",
usage="""Download summary statistics from GWAS Catalog (ebi.ac.uk) for given accession id.""",
)
parser.add_argument(
"accession_id", metavar="ID", help="Accession ID (GCSTXXXXXX for example)"
)
parser.add_argument(
"-o", "--output", default=sys.stdout, help="output file (default: stdout)"
)
args = parser.parse_args()
fetch_associations(args.accession_id, args.output)
sys.exit(0)