-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGS_to_R_sort.py
executable file
·66 lines (56 loc) · 2.11 KB
/
GS_to_R_sort.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/python2.6
"""script to manipulate Genome Studio output to fitTetra compatible
Remove first column with GS ID number before using this tool!!!! Format of marker name and sample X and Y only, remove other columns
single file output with ratio for tetraploid output. Not dealing with diploid data."""
import sys
import argparse
parser = argparse.ArgumentParser(description='GS output to fitTetra')
parser.add_argument('-i', type=argparse.FileType('r'), help="input Genome Studio file,in format tab delimited with columns: Name(marker), sample.X(values), sample.Y(values), required", dest='in_file', required=True)
args = parser.parse_args()
tet = open("tet_ratio", 'w')
f = args.in_file
names = []
sampleID = []
data = []
samples = []
tet.write("MarkerName\tSampleName\tratio\n")
ratio_dict = {}
cur_marker = ""
for line in f:
if line.startswith("Name"):
#generate a list with sampleIDs
#ignore first entry as not sampleID
names = line.split("\t")
names = names[1:]
for entry in names:
info = entry.split(".")
sample = info[0]
#split generates sample.x sample.y and a raw
if sample not in samples:
#make a list with unique sample names
samples.append(sample)
else:
data = line.split()
marker = data[0]
things = samples
count_sample = len(samples)
ratio_dict = {}
data = data[1:]
count_score = len(data)
while count_sample >= 1 and count_score >= 2:
x = float(data[0])
y = float(data[1])
if x == 0:
ratio = 0
else:
ratio = x /(x+y)
ratio = float(ratio)
ID = things[0]
ratio_dict[ID] = ratio
data = data[2:]
things = things[1:]
count_sample = len(things)
count_score = len(data)
for key in sorted(ratio_dict.iterkeys(), key=str.lower):
tet.write("%s\t%s\t%04f\n" % (marker, key, ratio_dict[key]))
tet.close()