forked from tooploox/SPCConvert
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspcconvert.py
573 lines (464 loc) · 20.1 KB
/
spcconvert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
# -*- coding: utf-8 -*-
"""
spcconvert - batch conversion and webpage building for SPC images
"""
import cvtools
import cv2
import os
import sys
import glob
import time
import datetime
import json
import shutil
import math
import tarfile
from multiprocessing import Pool, Process, Queue
from threading import Thread
import multiprocessing
from itertools import repeat
from operator import itemgetter
from pytz import timezone
import pystache
import numpy as np
import xmlsettings
from scipy import stats
import pandas
from collections import OrderedDict
def lmap(f, l):
return list(map(f,l))
def lzip(a,b):
return list(zip(a,b))
flow_frames = True
class Counter(object):
def __init__(self):
self.val = multiprocessing.Value('i', 0)
def increment(self, n=1):
with self.val.get_lock():
self.val.value += n
@property
def value(self):
return self.val.value
def process_image(bundle):
image_path = bundle['image_path']
image = bundle['image']
data_path = bundle['data_path']
image_dir = bundle['image_dir']
cfg = bundle['cfg']
total_images = bundle['total_images']
filename = os.path.basename(image_path)
# Patch bug in PCAM where timestamp string is somtimes incorrectly set to
# 0 or a small value. Use the file creation time instead.
#
# This is okay so long as the queue in PCAM mostly empty. We can adapt
# the frame counter to fix this in the future.
timestamp = 0
for substr in filename.split('-'):
try:
timestamp = int(substr)
break;
except ValueError:
pass
# Range check the timestamp
if timestamp < 100000:
print ("" + filename + " strange timestamp.")
#timestamp = os.path.getctime(image_path)
output = {}
return output
prefix = filename.split('.')[0]
# image is preloaded so no need to load here
#image = cvtools.import_image(data_path,filename,bayer_pattern=cv2.COLOR_BAYER_BG2RGB)
img_c_8bit = cvtools.convert_to_8bit(image)
# images will be saved out later so set save_to_disk to False
features = cvtools.quick_features(
img_c_8bit,
save_to_disk=False,
abs_path=image_dir,
file_prefix=prefix,
cfg=cfg
)
use_jpeg = use_jpeg = cfg.get("UseJpeg").lower() == 'true'
if use_jpeg:
filename = os.path.basename(image_path).split('.')[0] + '.jpeg'
else:
filename = os.path.basename(image_path).split('.')[0] + '.png'
# handle new file formwat with unixtime in microseconds
if timestamp > 1498093400000000:
timestamp = timestamp/1000000
# Range check the timestamp
if timestamp < 100000 or timestamp > time.time():
print ("" + filename + " strange timestamp.")
#timestamp = os.path.getctime(image_path)
output = {}
return output
# print "Timestamp: " + str(timestamp)
timestring = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
entry = {}
entry['maj_axis_len'] = features['major_axis_length']
entry['min_axis_len'] = features['minor_axis_length']
entry['aspect_ratio'] = features['aspect_ratio']
entry['area'] = features['area']
entry['clipped_fraction'] = features['clipped_fraction']
entry['orientation'] = features['orientation']*180/math.pi
entry['eccentricity'] = features['eccentricity']
entry['solidity'] = features['solidity']
entry['estimated_volume'] = features['estimated_volume']
entry['intensity_gray'] = features['intensity_gray']
entry['intensity_red'] = features['intensity_red']
entry['intensity_green'] = features['intensity_green']
entry['intensity_blue'] = features['intensity_blue']
entry['timestring'] = timestring
entry['timestamp'] = timestamp
entry['width'] = img_c_8bit.shape[1]
entry['height'] = img_c_8bit.shape[0]
entry['url'] = bundle['reldir'] + '/' + filename
entry['file_size'] = os.path.getsize(image_path)
output = {}
output['entry'] = entry
output['image_path'] = image_dir
output['prefix'] = prefix
output['features'] = features
return output
# threaded function for each process to call
# queues are used to sync processes
def process_bundle_list(bundle_queue,output_queue):
while True:
try:
output_queue.put(process_image(bundle_queue.get()))
except:
time.sleep(0.02*np.random.rand())
# Split a list into sublists
def chunks(l, n):
n = max(1, n)
return [l[i:i+n] for i in xrange(0, len(l), n)]
# Process a directory of images
def run(data_path,cfg):
print ("Running SPC image conversion...")
# get the base name of the directory
base_dir_name = os.path.basename(os.path.abspath(data_path))
# list the directory for tif images
print ("Listing directory " + base_dir_name + "...")
image_list = []
if cfg.get('MergeSubDirs',"false").lower() == "true":
sub_directory_list = sorted(glob.glob(os.path.join(data_path,"[0-9]"*10)))
for sub_directory in sub_directory_list:
print ("Listing sub directory " + sub_directory + "...")
image_list += glob.glob(os.path.join(sub_directory,"*.tif"))
else:
image_list += glob.glob(os.path.join(data_path,"*.tif"))
image_list = sorted(image_list)
# skip if no images were found
if len(image_list) == 0:
print ("No images were found. skipping this directory.")
return
# Get the total number of images in the directory
total_images = len(image_list)
# Create the output directories for the images and web app files
subdir = os.path.join(data_path,'..',base_dir_name + '_static_html')
if not os.path.exists(subdir):
os.makedirs(subdir)
image_dir = os.path.join(subdir,'images')
if not os.path.exists(image_dir):
os.makedirs(image_dir)
print ("Starting image conversion and page generation...")
# loop over the images and do the processing
images_per_dir = cfg.get('ImagesPerDir',2000)
if cfg.get("BayerPattern").lower() == "rg":
bayer_conv = cv2.COLOR_BAYER_RG2RGB
if cfg.get("BayerPattern").lower() == "bg":
bayer_conv = cv2.COLOR_BAYER_BG2RGB
print ("Loading images...\n",)
bundle_queue = Queue()
for index, image in enumerate(image_list):
reldir = 'images/' + str(images_per_dir*int(index/images_per_dir)).zfill(5)
absdir = os.path.join(image_dir,str(images_per_dir*int(index/images_per_dir)).zfill(5))
filename = os.path.basename(image)
if not os.path.exists(absdir):
os.makedirs(absdir)
bundle = {}
bundle['image_path'] = image
bundle['image'] = cvtools.import_image(os.path.dirname(image),filename,bayer_pattern=bayer_conv)
bundle['data_path'] = data_path
bundle['image_dir'] = absdir
bundle['reldir'] = reldir
bundle['cfg'] = cfg
bundle['total_images'] = total_images
bundle_queue.put(bundle)
print ("Loading images... (" + str(index) + " of " + str(total_images) + ")\n"),
#if index > 2000:
# total_images = index
# break
# Get the number o proceess to use based on CPUs
n_threads = multiprocessing.cpu_count() - 1
if n_threads < 1:
n_threads = 1
# Create the set of processes and start them
start_time = time.time()
output_queue = Queue()
processes = []
for i in range(0,n_threads):
p = Process(target=process_bundle_list, args=(bundle_queue,output_queue))
p.start()
processes.append(p)
# Monitor processing of the images and save processed images to disk as they become available
print ("\nProcessing Images...\r"),
counter = 0
entry_list = []
use_jpeg = use_jpeg = cfg.get("UseJpeg").lower() == 'true'
raw_color = cfg.get("SaveRawColor").lower() == 'true'
while True:
print ("Processing and saving images... (" + str(counter).zfill(5) + " of " + str(total_images).zfill(5) + ")\r",)
if counter >= total_images:
break
#if output_queue.qsize() == 0:
try:
output = output_queue.get()
if output:
entry_list.append(output['entry'])
output_path = os.path.join(output['image_path'],output['prefix'])
if use_jpeg:
if raw_color:
cv2.imwrite(os.path.join(output_path+"_rawcolor.jpeg"),output['features']['rawcolor'])
cv2.imwrite(os.path.join(output_path+".jpeg"),output['features']['image'])
else:
if raw_color:
cv2.imwrite(os.path.join(output_path+"_rawcolor.png"),output['features']['rawcolor'])
cv2.imwrite(os.path.join(output_path+".png"),output['features']['image'])
cv2.imwrite(os.path.join(output_path+"_binary.png"),output['features']['binary'])
counter = counter + 1
except:
time.sleep(0.05)
# Record the total time for processing
proc_time = int(math.floor(time.time()-start_time))
# Terminate the processes in case they are stuck
for p in processes:
p.terminate()
print ("\nPostprocessing...")
# sort the entries by height and build the output
entry_list.sort(key=itemgetter('maj_axis_len'),reverse=True)
# Create histograms of several key features
# image resolution in mm/pixel
image_res = cfg.get('PixelSize',22.1)/1000;
#print "Image resolution is set to: " + str(image_res) + " mm/pixel."
# Get arrays from the dict of features
total_images = len(entry_list)
nbins = int(np.ceil(np.sqrt(total_images)))
maj_len = np.array(lmap(itemgetter('maj_axis_len'),entry_list))*image_res
min_len = np.array(lmap(itemgetter('min_axis_len'),entry_list))*image_res
volume = np.array(lmap(itemgetter('estimated_volume'),entry_list))*image_res*image_res*image_res
aspect_ratio = np.array(lmap(itemgetter('aspect_ratio'),entry_list))
orientation = np.array(lmap(itemgetter('orientation'),entry_list))
area = np.array(lmap(itemgetter('area'),entry_list))*image_res*image_res
unixtime = np.array(lmap(itemgetter('timestamp'),entry_list))
elapsed_seconds = unixtime - np.min(unixtime)
file_size = np.array(lmap(itemgetter('file_size'),entry_list))/1000.0
# Gather features scaled by the pixel size
entry_list_scaled = []
for i, e in enumerate(entry_list):
data_list = [
('url', e['url']),
('timestamp', e['timestring']),
('file_size', file_size[i]),
('aspect_ratio', aspect_ratio[i]),
('maj_axis_len' , maj_len[i]),
('min_axis_len', min_len[i]),
('orientation', orientation[i]),
('eccentricity', e['eccentricity']),
('solidity', e['solidity']),
('estimated_volume', volume[i]),
('area', area[i]),
]
for intensity_group in ['intensity_gray','intensity_red', 'intensity_green', 'intensity_blue']:
if intensity_group in e:
for k, v in e[intensity_group].items():
data_list.append((intensity_group + "_" + k, v))
data = OrderedDict(data_list)
entry_list_scaled.append(data)
total_seconds = max(elapsed_seconds)
print ("Total seconds recorded: " + str(total_seconds))
if total_seconds < 1:
total_seconds = 1
print ("\nComputing histograms...")
# Compute histograms
all_hists = {}
hist = np.histogram(area,nbins)
all_hists['area'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(maj_len,nbins)
all_hists['major_axis_length'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(min_len,nbins)
all_hists['minor_axis_length'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(aspect_ratio,nbins)
all_hists['aspect_ratio'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(elapsed_seconds,np.uint32(total_seconds))
all_hists['elapsed_seconds'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(orientation,nbins)
all_hists['orientation'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
hist = np.histogram(file_size,nbins)
print ("\nComputing stats...")
all_hists['file_size'] = json.dumps(lzip(hist[1].tolist(),hist[0].tolist()))
# Compute general stats from features
all_stats = {}
all_stats['area'] = stats.describe(area)
all_stats['major_axis_length'] = stats.describe(maj_len)
all_stats['minor_axis_length'] = stats.describe(min_len)
all_stats['aspect_ratio'] = stats.describe(aspect_ratio)
all_stats['elapsed_seconds'] = stats.describe(elapsed_seconds)
all_stats['orientation'] = stats.describe(orientation)
all_stats['file_size'] = stats.describe(file_size)
print ("Exporting spreadsheet results...")
df = pandas.DataFrame(entry_list_scaled)
df.to_csv(os.path.join(subdir,'features.tsv'), index=False, sep='\t')
print ("Building web app...")
# Load html template for rendering
template = ""
with open(os.path.join('app','index.html'),"r") as fconv:
template = fconv.read()
# Define the render context from the processed histograms, images, and stats
context = {}
context['version'] = '1.0.1.05'
context['total_images'] = total_images
context['proc_time'] = proc_time
context['duration'] = total_seconds
context['compression_ratio'] = int((1000.0*24*total_images)/np.sum(file_size))
context['rois_per_second'] = total_images/context['duration']
context['kb_per_second'] = int(np.sum(file_size)/context['duration'])
context['recording_started'] = datetime.datetime.fromtimestamp(np.min(unixtime)).strftime('%Y-%m-%d %H:%M:%S')
context['app_title'] = "SPC Convert: " + base_dir_name
context['dir_name'] = base_dir_name
context['raw_color'] = raw_color
context['image_res'] = image_res
if use_jpeg:
context['image_ext'] = '.jpeg'
else:
context['image_ext'] = '.png'
context['stats_names'] = [{"name":"Min"},{"name":"Max"},{"name":"Mean"},{"name":"Standard Deviation"},{"name":"Skewness"},{"name":"Kurtosis"}]
# definie the charts to display from the histogram data
charts = []
for chart_name, data_values in all_hists.items():
chart = {}
chart['source'] = 'js/' + chart_name + '.js'
chart['name'] = chart_name
units = ""
if chart_name == 'area':
units = " (mm*mm)"
if chart_name == 'major_axis_length' or chart_name == 'minor_axis_length':
units = " (mm)"
if chart_name == 'file_size':
units = " (kB)"
if chart_name == 'elapsed_seconds':
units = " (s)"
if chart_name == 'orientation':
units = " (deg)"
chart['title'] = 'Histogram of ' + chart_name + units
chart['x_title'] = chart_name + units
chart['y_title'] = 'counts'
chart['stats_title'] = chart_name
chart['data'] = data_values
chart['stats'] = []
chart['stats'].append({"name":"Min","value":"{:10.3f}".format(all_stats[chart_name][1][0])})
chart['stats'].append({"name":"Max","value":"{:10.3f}".format(all_stats[chart_name][1][1])})
chart['stats'].append({"name":"Mean","value":"{:10.3f}".format(all_stats[chart_name][2])})
chart['stats'].append({"name":"Standard Deviation","value":"{:10.3f}".format(math.sqrt(all_stats[chart_name][3]))})
chart['stats'].append({"name":"Skewness","value":"{:10.3f}".format(all_stats[chart_name][4])})
chart['stats'].append({"name":"Kurtosis","value":"{:10.3f}".format(all_stats[chart_name][5])})
charts.append(chart)
context['charts'] = charts
# render the html page and save to disk
page = pystache.render(template,context)
with open(os.path.join(subdir,'spcdata.html'),"w") as fconv:
fconv.write(page)
# remove any old app files and try to copy over new ones
try:
shutil.rmtree(os.path.join(subdir,"css"),ignore_errors=True)
shutil.copytree("app/css",os.path.join(subdir,"css"))
shutil.rmtree(os.path.join(subdir,"js"),ignore_errors=True)
shutil.copytree("app/js",os.path.join(subdir,"js"))
except:
print ("Error copying supporting files for html.")
# Load roistore.js database for rendering
template = ""
with open(os.path.join('app','js','database-template.js'),"r") as fconv:
template = fconv.read()
context = {}
context['image_items'] = entry_list
context['table'] = base_dir_name
# render the javascript page and save to disk
page = pystache.render(template,context)
with open(os.path.join(subdir,'js','database.js'),"w") as fconv:
fconv.write(page)
print ("Done.")
def valid_image_dir(test_path):
list = glob.glob(os.path.join(test_path,"*.tif"))
if len(list) > 0:
return True
else:
return False
# Module multiprocessing is organized differently in Python 3.4+
try:
# Python 3.4+
if sys.platform.startswith('win'):
import multiprocessing.popen_spawn_win32 as forking
else:
import multiprocessing.popen_fork as forking
except ImportError:
import multiprocessing.forking as forking
if sys.platform.startswith('win'):
# First define a modified version of Popen.
class _Popen(forking.Popen):
def __init__(self, *args, **kw):
if hasattr(sys, 'frozen'):
# We have to set original _MEIPASS2 value from sys._MEIPASS
# to get --onefile mode working.
os.putenv('_MEIPASS2', sys._MEIPASS)
try:
super(_Popen, self).__init__(*args, **kw)
finally:
if hasattr(sys, 'frozen'):
# On some platforms (e.g. AIX) 'os.unsetenv()' is not
# available. In those cases we cannot delete the variable
# but only set it to the empty string. The bootloader
# can handle this case.
if hasattr(os, 'unsetenv'):
os.unsetenv('_MEIPASS2')
else:
os.putenv('_MEIPASS2', '')
# Second override 'Popen' class with our modified version.
forking.Popen = _Popen
if __name__ == '__main__':
multiprocessing.freeze_support()
if len(sys.argv) <= 1:
print ("Please input a dirtectory of data directories, aborting.")
else:
if len(sys.argv) <= 2:
data_path = sys.argv[1]
# load the config file
cfg = xmlsettings.XMLSettings(os.path.join(sys.path[0],'settings.xml'))
combine_subdirs = cfg.get('MergeSubDirs',"False").lower() == "true"
print ("Settings file: " + os.path.join(sys.path[0],'settings.xml'))
# If file given try to unpack
if os.path.isfile(data_path):
extracted_path = data_path + "_unpacked"
with tarfile.open(data_path) as archive:
archive.extractall(path=extracted_path)
data_path = extracted_path
to_clean = extracted_path
# If given directory is a single data directory, just process it
if valid_image_dir(data_path):
run(data_path,cfg)
sys.exit(0)
# Otherwise look for data directories in the given directory
# List data directories and process each one
# expect the directories to be in the unixtime format
directory_list = sorted(glob.glob(os.path.join(data_path,"[0-9]"*10)))
if len(directory_list) == 0:
print ("No data directories found.")
sys.exit(0)
# Process the data directories in order
print ('Processing each data directory...')
for directory in directory_list:
if os.path.isdir(directory):
if not combine_subdirs:
if valid_image_dir(directory):
run(directory,cfg)
else:
run(directory,cfg)