-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrf_qaqc_site_selector.R
138 lines (109 loc) · 5.29 KB
/
wrf_qaqc_site_selector.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
library(tidyverse)
library(fs)
library(sf)
library(furrr)
library(checkmate)
library(worldmet)
library(openair)
library(rvest)
library(reticulate)
wrf = import('wrf')
ncpy = import('netCDF4')
xr = import('xarray')
ccrs = import('cartopy.crs')
metpy = import('metpy')
np = import('numpy')
source('emep_qaqc_funcs.R')
source('wrf_qaqc_user_input.R')
future::plan(multicore, workers = 8)
# MAIN --------------------------------------------------------------------
#extract modelled year based on input wrf files
wrf_files = WRF_DIR %>%
dir_ls() %>%
str_subset(str_c('wrfout_',WRF_DOMAIN))
#get modelled year from the first file
yr = ncpy$Dataset(wrf_files[1]) %>%
wrf$extract_times(timeidx = wrf$ALL_TIMES) %>%
year() %>%
unique() %>%
as.integer()
if (!file_exists(path(data_pth_out, paste0('All_sites_in_domain_', WRF_DOMAIN, '.rds')))) {
#collate met sites with available data for modelled year into a tibble
met_sites = get_isd_sites(yr)
#extract model domain x,y dimension sizes
sn_dim = ncpy$Dataset(wrf_files[1])$dimensions$south_north$size
we_dim = ncpy$Dataset(wrf_files[1])$dimensions$west_east$size
#extract site indeces and filter out those outside the model domain
met_indeces = wrf$ll_to_xy(ncpy$Dataset(wrf_files[1]), latitude = met_sites$latitude, longitude = met_sites$longitude, meta = F)
met_indeces = as_tibble(met_indeces, .name_repair = ~met_sites$code) %>%
mutate(index = c('i', 'j')) %>%
pivot_longer(-index, names_to = 'code', values_to = 'val') %>%
pivot_wider(id_cols = code, names_from = index, values_from = val) %>%
filter(between(i, 0, we_dim - 1),between(j, 0, sn_dim - 1)) # python is 0-start index
met_sites_in = met_sites %>%
filter(code %in% met_indeces$code)
#save a tibble with meta data of all met sites within the model domain with available data
write_rds(met_sites_in, path(data_pth_out, paste0('All_sites_in_domain_', WRF_DOMAIN, '.rds')))
} else {
met_sites_in = read_rds(path(data_pth_out, paste0('All_sites_in_domain_', WRF_DOMAIN, '.rds')))
}
met_sites_sample = met_sites_in
#OPTIONAL: DOWNSAMPLE SITES BEFORE DOWNLOADING DATA
for (s in seq_along(SITES_SAMPLING_TYPE)) {
met_sites_sample = met_sites_sample %>%
sample_sites(type = SITES_SAMPLING_TYPE[[s]],
selector = SITES_SAMPLING_SELECTOR[[s]],
n = N_SITES[[s]])
}
write_rds(met_sites_sample, path(data_pth_out, paste0('Sites_sample_in_domain_', WRF_DOMAIN, '.rds')))
#download observation data
future_pwalk(list(code = met_sites_sample$code, pth = path(dir_create(path(data_pth_out, 'Raw_obs')), str_c(met_sites_sample$code, '_raw_obs'), ext = 'rds')),
my_importNOAA, year = yr)
# processing downloaded noaa data -----------------------------------------
raw_obs = dir_ls(path(data_pth_out, 'Raw_obs')) %>%
str_subset('raw_obs')
noaa_summary = raw_obs %>%
future_map_dfr(~read_rds(.x) %>%
clean_noaa() %>%
assess_noaa())
#SELECT OBSERVATION STATION AS DESIRED - below is just a basic example
suitable = noaa_summary %>%
filter(ws_dc >= MOBS_THRESHOLD, air_temp_dc >= MOBS_THRESHOLD) %>%
distinct(code, report_type, obs_minute, .keep_all = T)
# -------------------------------------------------------------------------
### if you're left with too many suitable sites at this stage you can downsample them here by uncommenting and running the chunk below:
# met_sites_sample = met_sites_sample %>%
# filter(code %in% suitable$code)
# for (s in seq_along(SITES_SAMPLING_TYPE)) {
# met_sites_sample = met_sites_sample %>%
# sample_sites(type = SITES_SAMPLING_TYPE[[s]],
# selector = SITES_SAMPLING_SELECTOR[[s]],
# n = N_SITES[[s]])
# }
#
# write_rds(met_sites_sample, path(data_pth_out, paste0('Sites_sample_in_domain_', WRF_DOMAIN, '.rds')))
# suitable = suitable %>%
# filter(code %in% met_sites_sample)
# -------------------------------------------------------------------------
#write the suitable sites into a file
write_rds(suitable, path(data_pth_out, paste0('Suitable_sites_in_domain_', WRF_DOMAIN, '_summary'), ext = 'rds'))
#filter the selected observation data from the raw data files and save them in the Data directory for collation with modelled values
#this will depend on individual needs - below is just an example
#YOU !!!MUST!!! MAKE SURE THAT a) ONE RECORD PER TIME STAMP, b) TIME STAMP AT THE TOP OF THE HOUR ONLY!!!
noaa_list = map_chr(suitable$code, str_subset, string = raw_obs) %>%
future_map(read_rds) %>%
future_map(clean_noaa) %>%
future_map(~semi_join(.x, suitable, by = c('code', 'report_type', 'obs_minute'))) %>%
future_map(~mutate(.x, date = ceiling_date(date, unit = 'hour')))
#once filtered format and save the selected obs data
noaa_list %>%
future_map(format_noaa) %>%
future_map(~pivot_wider(.x, id_cols = c(date, code, scenario), names_from = var, values_from = value)) %>%
future_walk2(.y = path(data_pth_out, paste0(suitable$code, '_obs'), ext = 'rds'), write_rds)
#save geo meta data for the sites whose data will be used
sites_geo = met_sites_sample %>%
filter(code %in% suitable$code) %>%
st_as_sf(coords = c('longitude', 'latitude'), crs = 4326) %>%
mutate(longitude = st_coordinates(.)[ , 1],
latitude = st_coordinates(.)[ , 2]) %>%
write_rds(path(data_pth_out, '00Sites_used.rds'))