-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathseed.py
402 lines (275 loc) · 11.9 KB
/
seed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
"""add star and constellation data to the stars db. """
# Copyright (c) 2017 Bonnie Schulkin
# This file is part of My Heavens.
# My Heavens is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
# My Heavens is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
# for more details.
# You should have received a copy of the GNU Affero General Public License
# along with My Heavens. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import csv
import re
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
from model import db, connect_to_db, Star, Constellation, ConstLineVertex, \
ConstLineGroup, BoundVertex, ConstBoundVertex
from colors import COLOR_BY_SPECTRAL_CLASS
# for debugging output. False by default unless running the script directly.
DEBUG = False
# to be able to distinguish between data dir for testing
DATADIR = 'seed_data'
# for spectral classes
SC_RE = re.compile(r'([OBAFGKM]\d) ?\(?([VI]*)\)?')
# for cleaning up BayerFlamsteed names
BF_RE = re.compile(r'^\d+\s*')
# for stars with unrecognizable spectral classes
DEFAULT_COLOR = "#ffffff"
def open_datafile(datadir, file_type):
"""Return path to the data file type using the datadir as the location.
Handy for testing when using a different datadir"""
if file_type == 'stars':
filename = 'hygfull.csv'
elif file_type == 'consts':
filename = 'const_abbrevs.csv'
elif file_type == 'bounds':
filename = 'constellation_boundaries.txt'
elif file_type == 'lines':
filename = 'constellation_lines.csv'
else:
return None
return open(os.path.join(datadir, filename))
def announce(action):
"""Give feedback on where in the script we are."""
if DEBUG:
print
print('*' * 20)
print(action)
print('*' * 20)
def get_degrees_from_hours_and_invert(ra_in_hrs):
"""Return a degree equivalent of input RA in hours and invert.
Inversion (subtraction from 360) is necessary to simulate looking at the
*inside* of the celestial sphere in d3, instead of the outside
(input in string or Decimal format)"""
return 360 - float(ra_in_hrs) * 360 / 24
def get_color(spectral_class):
"""get hex color from spectral class"""
match = SC_RE.search(spectral_class)
if match:
sc_a = match.group(1)
sc_b = match.group(2)
# un-matchable spectrum; e.g. F1
if sc_a not in COLOR_BY_SPECTRAL_CLASS:
return DEFAULT_COLOR
# missing secondary spectrum
if not sc_b or sc_b not in COLOR_BY_SPECTRAL_CLASS[sc_a]:
# just pick a random color from this spectral class
spectral_colors = COLOR_BY_SPECTRAL_CLASS[sc_a]
return spectral_colors[list(spectral_colors.keys())[0]]
# if we got to here, all's well
return COLOR_BY_SPECTRAL_CLASS.get(sc_a).get(sc_b)
else:
# we've got ourselves a white star!
return DEFAULT_COLOR
def get_name_and_constellation(star_info):
"""get the name and constellation from a line in the STARDATA file"""
# get the name
name = star_info['ProperName'].strip() or None
# strip unnecessary BayerFlamsteed cruft
bf = star_info['BayerFlamsteed'].strip()
bf = re.sub(r' +', ' ', bf)
bf = re.sub(r'^[\d ]+', '', bf)
if not name and len(bf) > 3:
# if bf is just 3 characters long, it's only the constellation
name = bf
# now for the constellation -- it's the last 3 characters of BayerFlamsteed
if bf:
constellation = bf[-3:].upper()
else:
constellation = None
return name, constellation
def load_constellations(datadir):
"""Load constellation names and abbreviations from csv into db."""
announce('loading constellations')
# read in all the constellations and make objects for them
with open_datafile(datadir, 'consts') as csvfile:
reader = csv.DictReader(csvfile)
# make a new const obj for each line and add to db
for constline in reader:
newconst = Constellation(const_code=constline['Abbrev'],
name=constline['Name'])
db.session.add(newconst)
db.session.commit()
def get_bounds_vertex(ra_in_deg, dec_in_deg):
"""Search for the bounds vertex matching the input. Create a new one if needed.
ra_in_deg and dec_in_deg are floats.
returns BoundsVertex object.
"""
# account for the fact that the input file has greater precision than
# what's stored in the db
rounded_ra = int(ra_in_deg * 1000) / 1000.0
rounded_dec = int(dec_in_deg * 1000) / 1000.0
# create the vertex, if it doesn't already exist
try:
vertex = BoundVertex.query.filter_by(ra=rounded_ra, dec=rounded_dec).one()
except NoResultFound:
vertex = BoundVertex(ra=ra_in_deg, dec=dec_in_deg)
db.session.add(vertex)
# to get an id, and make available for future iterations
db.session.flush()
return vertex
def load_const_boundaries(datadir):
"""Add the boundary vertices for each constellation into the db"""
announce('loading constellation boundaries')
with open_datafile(datadir, 'bounds') as boundfile:
# keep track of what constellation we're on, in order to reset indexes when
# we switch constellations
last_const = None
for boundline in boundfile:
ra_in_hrs, dec, const = boundline.strip().split()
# translate ra into degrees and invert for d3
ra_in_deg = get_degrees_from_hours_and_invert(ra_in_hrs)
dec_in_deg = float(dec)
# reset the index if necessary
if const != last_const:
index = 0
last_const = const
vertex = get_bounds_vertex(ra_in_deg, dec_in_deg)
# add the vertex to the constellation boundary
const_bound_vertex = ConstBoundVertex(const_code=const,
vertex_id=vertex.vertex_id,
index=index)
db.session.add(const_bound_vertex)
# increment the index
index += 1
db.session.commit()
def load_stars(datadir):
"""Load star data from csv into the database."""
announce('loading stars')
line_num = 0
with open_datafile(datadir, 'stars') as csvfile:
reader = csv.DictReader(csvfile)
for starline in reader:
# display progress
line_num += 1
if DEBUG and line_num % 5000 == 0:
print('{} stars'.format(line_num))
# skip really dim stars
magnitude = float(starline['Mag'].strip())
if magnitude > 7:
continue
# translate ra into degrees and invert for d3
ra_in_deg = get_degrees_from_hours_and_invert(starline['RA'])
dec_in_deg = float(starline['Dec'])
# sometimes color_index is a bunch of space characters
if re.match(r"\S", starline['ColorIndex']):
color_index = starline['ColorIndex']
else:
color_index = None
# get color from spectral class
spectrum = starline['Spectrum'].strip()
color = get_color(spectrum)
# get name from the best available column
name, const = get_name_and_constellation(starline)
star = Star(
name=name,
const_code=const,
ra=ra_in_deg,
dec=dec_in_deg,
distance=starline['Distance'],
magnitude=magnitude,
absolute_magnitude=starline['AbsMag'],
spectrum=spectrum,
color_index=color_index,
color=color)
db.session.add(star)
db.session.commit()
def get_matching_star(ra_in_deg, dec_in_deg, mag, const=None, name=None):
"""Get the closest star matching the input values.
const and name are strings used only for debugging.
Returns a Star object"""
# find the star matching this constellation line point
query = Star.query.filter(db.func.abs(Star.ra - ra_in_deg) < 0.02,
db.func.abs(Star.dec - dec_in_deg) < 0.02)
query_with_magnitude = query.filter(db.func.abs(db.func.abs(Star.magnitude) - db.func.abs(mag)) < 0.5)
try:
try:
star = query_with_magnitude.one()
except NoResultFound:
# some of the magnitudes are way off (variable stars?). Try without the magnitude
try:
star = query.one()
if DEBUG:
print("matched {} {} without magnitude".format(const, name))
except NoResultFound:
if DEBUG:
error = "couldn't find a star match for {} {} ra {} dec {} mag {}"
print(error.format(const, name, ra_in_deg, dec_in_deg, mag))
print("exiting...")
exit()
except MultipleResultsFound:
# just go with the brightest star that matches the coordinates
star = query.order_by(Star.magnitude).first()
if DEBUG:
print("matched {} {} with brightest star in region".format(const, name))
return star
def load_constellation_lines(datadir):
"""Add the constellation lines into the db.
* Each continuous line gets its own line group.
* Match stars to existing stars in the db using ra, dec, and magnitude
"""
announce('loading constellation lines')
# to track whether it's time for a new group
group_break = True
with open_datafile(datadir, 'lines') as csvfile:
reader = csv.DictReader(csvfile)
for constpoint in reader:
# time to make a new group?
if not constpoint['RA']:
group_break = True
continue
# translate degrees to hours and invert
ra_in_deg = get_degrees_from_hours_and_invert(constpoint['RA'])
# get data into proper format
dec_in_deg = float(constpoint['DEC'])
mag = float(constpoint['MAG'])
# find the matching star in the db
star = get_matching_star(ra_in_deg, dec_in_deg, mag, constpoint['CON'], constpoint['NAME'])
# make a new group if necessary
if group_break:
group = ConstLineGroup(const_code=constpoint['CON'])
db.session.add(group)
db.session.flush()
# reset running vars
index = 0
group_break = False
# add this vertex to the db and the group
vert = ConstLineVertex(const_line_group_id=group.const_line_group_id,
star_id=star.star_id,
index=index)
db.session.add(vert)
db.session.commit()
def load_seed_data(ddir):
"""Run all the functions to load the seed data.
For ease in seeding test database with one line of code."""
load_constellations(ddir)
load_const_boundaries(ddir)
load_stars(ddir)
load_constellation_lines(ddir)
if __name__ == '__main__':
# don't import app from server; we don't want to have to wait for the
# the tzwhere instance
from flask import Flask
app = Flask(__name__)
connect_to_db(app)
# if we're running it directly, we probably want to see debug
DEBUG = True
print('dropping tables...')
db.drop_all()
print('creating tables...')
db.create_all()
load_seed_data(DATADIR)