forked from keflavich/flask_project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest_datasets_better.py
109 lines (98 loc) · 3.52 KB
/
ingest_datasets_better.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
from astropy import table
from astropy.table import Table,Column
from astropy import units as u
def fix_logical(t):
"""
Convert a boolean column from string to boolean
"""
newcols = []
for col in t.columns.values():
if col.dtype.str.endswith('S5') or col.dtype.str.endswith('S4'):
falses = col == 'False'
trues = col == 'True'
if np.all(falses | trues):
col = t.ColumnClass(trues, name=col.name)
newcols.append(col)
return Table(newcols)
def reorder_columns(tbl, order):
"""
Sort the columns into an order set by the order list
"""
cols = [tbl[colname] for colname in order]
return Table(cols)
def rename_columns(tbl, mapping = {'name':'Names', 'id':'IDs',
'surfdens':'SurfaceDensity',
'vdisp':'VelocityDispersion',
'radius':'Radius','is_sim':'IsSimulated'},
remove_column='Ignore'):
"""
Rename table columns inplace
"""
for k,v in mapping.items():
if k in tbl.colnames:
if v == remove_column:
tbl.remove_column(k)
elif k != v:
tbl.rename_column(k,v)
def fix_bad_types(tbl):
"""
For all columns that *can* be converted to float, convert them to float
"""
columns = []
for columnname, column in tbl.columns.items():
try:
col = Column(data=column.astype('float'), name=column.name)
columns.append(col)
except:
columns.append(column)
return Table(columns)
def set_units(tbl, units={'SurfaceDensity':u.M_sun/u.pc**2,
'VelocityDispersion':u.km/u.s,
'Radius':u.pc}):
"""
Set the units of the table to the specified units.
WARNING: this *overwrites* existing units, it does not convert them!
"""
for k,v in units.items():
if k not in tbl.colnames:
raise KeyError("{0} not in table: run `rename_columns` first.".format(k))
#DEBUG print 'BEFORE unit for',k,":",tbl[k].unit
if v:
# only set units if there is a unit to be specified
tbl[k].unit = v
#DEBUG print 'AFTER unit for',k,":",tbl[k].unit
def convert_units(tbl, units={'SurfaceDensity':u.M_sun/u.pc**2,
'VelocityDispersion':u.km/u.s,
'Radius':u.pc}):
"""
Set the units of the table to the specified units.
WARNING: this *overwrites* existing units, it does not convert them!
"""
for k,v in units.items():
if k not in tbl.colnames:
raise KeyError("{0} not in table: run `rename_columns` first.".format(k))
tbl[k] = tbl[k].to(v)
def add_name_column(tbl, name):
"""
Add the person's name as a column
"""
tbl.add_column(table.Column(name='Names', data=[name]*len(tbl)), index=0)
def append_table(merged_table, table_to_add):
"""
Append a new table to the original
"""
for row in table_to_add:
merged_table.add_row(row)
def add_generic_ids_if_needed(tbl):
"""
Add numbered IDs if no IDs column is provided
"""
if 'IDs' not in tbl.colnames:
tbl.add_column(table.Column(data=np.arange(len(tbl)), name='IDs'))
def add_is_sim_if_needed(tbl, is_sim=True):
"""
Add is_sim if no is_sim column is provided
"""
if 'IsSimulated' not in tbl.colnames:
tbl.add_column(table.Column(data=[is_sim]*(len(tbl)), name='IsSimulated'))