-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprocess_Standard_pickedpdf.py
executable file
·274 lines (220 loc) · 13.3 KB
/
process_Standard_pickedpdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
################ This is the import statement required to reference scripts within the package
import os,sys,glob
ndh_tools_path_opts = [
'/mnt/data01/Code/',
'/home/common/HolschuhLab/Code/'
]
for i in ndh_tools_path_opts:
if os.path.isfile(i): sys.path.append(i)
################################################################################################
def process_Standard_pickedpdf(picked_files,orig_radar_dir,layer_save, cresis_flag=1, layer_save_type=1, layer_load='', find_rows_from_fullimageset = 0):
"""
% (C) Nick Holschuh - Amherst College -- 2022 (Nick.Holschuh@gmail.com)
%
% This function extracts annotations from nadir radargrams made on an iPad
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The inputs are:
%
% picked_files - List of filenames of pdfs containing annotations
% orig_radar_dir - List of directories that contained the origina radar data files contained in each pdf
% layer_save - The name of the directory you want to save layer output to
% cresis_flag=1 - If this is a CReSIS file, this should be set to 1, otherwise, 0.
% layer_save_type=1 - For most applications, this should be set to 1, which is, save files in your current dir.
% 0 - This allows you to save layer files within the cresis file_tree
% layer_load='' - This is not fully implemented, but it would allow you to populate existing layer files
% find_rows_from_fullimageset - Setting this to 1 will search all images to figure out which rows
% of pixels are within the plot. For use when some of the bottom of radargrams
% is all white.
%
%%%%%%%%%%%%%%%
% The outputs are:
% saved files for annotations in each image
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from tqdm import tqdm, tqdm_notebook
import NDH_Tools as ndh
deconstruct_dir = 'Picked_Temp'
deconstruct_flag = 1
delete_flag = 1
if layer_save_type == 1:
layer_load=''
########## Here we actually do the image processing:
for ind0,fn in enumerate(picked_files):
##########################################################################################################
# Part 1 ##################################################################################################
######## Here we parse the name for the file information and the image specifications
local_fn_whole = fn.split('/')[-1]
fileparts = local_fn_whole.split('.')[0].split('_')
crop = fileparts[-1]
##########################################################################################################
# Part 2 #################################################################################################
######## Here we identify the files we may need to load later
if cresis_flag == 1:
day_seg = '_'.join(fileparts[1:3])
year = day_seg[0:4]
season = ndh.cresis_season(day_seg)['season']
standard_fns = sorted(glob.glob(orig_radar_dir[ind0]+'/Data_'+day_seg+'*.mat'))
else:
standard_fns = sorted(glob.glob(orig_radar_dir[ind0]+'/*.mat'))
if layer_save_type == 0:
layer_fns = []
layer_load_fns = []
save_dir = '/'.join(standard_fns[0].split('/')[0:-3])+'/'+layer_save
for ind1,temp_fn in enumerate(standard_fns):
deconstructed_fn = temp_fn.split('/')
deconstructed_fn[-3] = layer_save
layer_fns.append('/'.join(deconstructed_fn))
if len(layer_load) > 0:
layer_load_fn_temp = deconstructed_fn
layer_load_fn_temp[-3] = layer_load
layer_load_fns.append(layer_load_fn_temp)
elif layer_save_type == 1:
save_dir = './'+layer_save
layer_fns = []
for ind1,temp_fn in enumerate(standard_fns):
layer_fns.append(save_dir+'/'+temp_fn.split('/')[-1])
########### Preconstruct directories for use:
comb_deconstruct_dir = './'+deconstruct_dir
if not os.path.isdir(comb_deconstruct_dir):
os.makedirs(comb_deconstruct_dir)
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
##########################################################################################################
# Part 3 #################################################################################################
######## Here we define the objects that need to be populated with picks
##########################################################################################################
# Part 4 ###################################################################################################
########## The following converts a pdf to multiple images
if deconstruct_flag == 1:
print('Starting the pdf deconstruction for: '+local_fn_whole)
os_cmd = 'convert -quality 20 -density 144 %s %s/%s' % (fn,comb_deconstruct_dir,'Frame_%03d.png')
os.system(os_cmd)
frame_list = sorted(glob.glob(comb_deconstruct_dir+'/*.png'))
##########################################################################################################
# Part 5 ##################################################################################################
print('Starting the information extraction.')
########## For images that have white at the bottom, we first look through all images to figure out the appropriate
########## row indecies to use.
if find_rows_from_fullimageset == 1:
for ind1,frame_fn in enumerate(frame_list):
im_handle = Image.open(frame_fn)
np_frame = np.array(im_handle)
np_frame_dims = np_frame.shape
#print('Frame number %0.2d' % ind1)
#print('np_frame shape: ',np_frame_dims)
########## For PNGs with RGB+components, this works best
if np_frame_dims[2] == 4:
if ind1 == 0:
im_frame = np.array(np_frame[:,:,3] != 0).astype(float)
else:
im_frame = im_frame + np.array(np_frame[:,:,3] != 0).astype(float)
########## For greyscale+transparent, this is what you need
elif np_frame_dims[2] == 2:
if ind1 == 0:
im_frame = np.array(np_frame[:,:,1] != 0).astype(float)
else:
im_frame = im_frame + np.array(np_frame[:,:,1] != 0).astype(float)
if 'im_frame' in locals():
#plt.imshow(im_frame)
selected_rows = ndh.minmax(np.where(im_frame > 0)[0])
#print(selected_rows)
else:
find_rows_from_fullimageset = 0
print('Something went wrong with the full image-set. Defaulting to local row search.')
########## Here we actually load the images and extract pixel coordinate information
error_frames = []
good_frames = []
empty_frames = []
for ind1,frame_fn in enumerate(tqdm(frame_list)):
##### Confirm that file is associated with the right frame
if np.all(['_%0.3d' % (ind1+1) not in standard_fns[ind1],cresis_flag == 1]):
error_frames.append(ind1)
else:
frame_data = ndh.loadmat(standard_fns[ind1])
times = frame_data['Time']
original_width = len(frame_data['Bottom'])-1
height_index = ndh.find_nearest(frame_data['Time'],np.nanmax(frame_data['Bottom']))
if crop == 'maxbotplus25':
original_height = height_index['index'][0]+25
elif crop == 'maxbotplus100':
original_height = height_index['index'][0]+100
elif crop == 'nocrop':
original_height = len(frame_data['Time'])
if find_rows_from_fullimageset == 0:
picks = ndh.find_pixelcoords(frame_fn,original_width,original_height,im_pick_params=[[2,25,1,10,1]])
else:
picks = ndh.find_pixelcoords(frame_fn,original_width,original_height,im_pick_params=[[2,25,1,10,1]], predefined_row_inds=selected_rows)
##########################################################################################################
# Part 6 #################################################################################################
########## Here we put pixel information in its final objects
if len(picks) > 0:
if len(picks[0]) > 0:
good_frames.append(ind1)
surfaces = picks[0]
max_surf_depth = []
surfaces_time = []
for ind2,i in enumerate(surfaces):
surfaces_time.append(times[i[:,1].astype(int)])
max_surf_depth.append(np.max(surfaces_time[-1]))
surf_order = np.argsort(max_surf_depth)[::-1]
layer_local_fn = standard_fns[ind1].split('/')[-1]
########## Load or construct the object
if len(layer_load) == 0:
layer_data = {'picks':[],'Latitude':frame_data['Latitude'],'Longitude':frame_data['Longitude'],
'Elevation':frame_data['Elevation'],'Surface':frame_data['Surface'],'Bottom':frame_data['Bottom']}
elif len(layer_load) > 0:
layer_data = ndh.loadmat(layer_load_fns[ind1])
layer_ids = layer_data['id']
layer_quality = layer_data['quality']
layer_twtt = layer_data['twtt']
layer_type = layer_data['type']
basic_infill_object = np.ones(layer_twtt[0].shape)
else:
error_frames.append(ind1)
########## Loop through the layers
for ind2,i in enumerate(surf_order):
layer_times = frame_data['Time'][np.array(surfaces[i]).astype(int)[:,1]]
ki = np.array(surfaces[i]).astype(int)[:,0]
if layer_save_type == 0:
######## For some reason layer files and the image have different sizes.
######## so we have to interpolate the pick indecies onto the gpstime...
ki_times = np.squeeze(frame_data['GPS_time'][ki])
ki_layer, new_ki = np.unique(ndh.find_nearest(layer_data['gps_time'],np.squeeze(ki_times))['index'],return_index=True)
twtt_temp = basic_infill_object*np.nan
twtt_temp[ki_layer] = ki_times[new_ki]
layer_twtt = np.vstack([layer_twtt,twtt_temp])
layer_ids = np.append(layer_ids,layer_ids[-1]+1)
layer_type = np.vstack([layer_type,basic_infill_object*2])
layer_quality = np.vstack([layer_quality,basic_infill_object])
elif layer_save_type == 1:
twtt_temp = np.ones(layer_data['Latitude'].shape)*np.nan
twtt_temp[ki] = layer_times
layer_data['picks'].append(twtt_temp)
if layer_save_type == 0:
layer_data['id'] = layer_ids
layer_data['quality'] = layer_quality
layer_data['twtt'] = layer_twtt
layer_data['type'] = layer_type
ndh.savemat(layer_data,layer_fns[ind1])
else:
empty_frames.append(ind1)
else:
empty_frames.append(ind1)
if len(error_frames) > 0:
print('Some frames had errors: ',error_frames)
print('These frames had picks: ',good_frames)
print('These frames were empty: ',empty_frames)
##########################################################################################################
# Part 7 #################################################################################################
########## Here we clean up the temporary directory and save the output
if delete_flag == 1:
os_cmd = 'rm -r %s' % (comb_deconstruct_dir)
os.system(os_cmd)