-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_targets.R
326 lines (299 loc) · 14.2 KB
/
_targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# This file defines the pipeline to do all data gathering, data processing, and
# computations for the CalEPA Pollution & Prejudice project. It also creates
# some output plots and maps, and puts them in a summary report.
#
# Follow the manual to check and run the pipeline:
# https://books.ropensci.org/targets/walkthrough.html#inspect-the-pipeline
# setup -------------------------------------------------------------------
## Load packages required to define the pipeline
library(targets)
library(tarchetypes)
library(here)
# force report to render to html if source qmd file changes
last_report_update <- file.info('03-3_output_reports/summary_report.qmd')$mtime
## Set target options:
tar_option_set(
# packages that targets need to run
packages = c('tidyverse',
'conflicted',
'here',
'glue',
'janitor',
'tools',
'httr',
'sf',
'jsonlite',
'geojsonsf',
'tigris',
'rmapshaper',
'units',
'scales',
'cowplot',
'tmap',
'tmaptools',
'ceramic',
'rosm',
'sp',
'quarto',
'gt',
'knitr',
'zip'
),
# default storage format
format = "rds",
# set seed (for consistent geom_jitter)
seed = 1234,
# for debugging (if needed) - on error, call tar_workspace() to load the workspace
workspace_on_error = TRUE,
# for saving workspaces for specific targets (can be useful for development / debugging)
workspaces = NULL # c('sf_formatted_holc_data') # enter names of targets to save workspaces for - call tar_workspace() to load the workspace
)
## parallel options (set by targets) ----
### tar_make_clustermq() configuration
options(clustermq.scheduler = "multiprocess")
### tar_make_future() configuration
future::plan(future.callr::callr)
# define pipeline ---------------------------------------------------------
## get functions (from R folder) ----
tar_source(files = c('R', '02_scripts'))
## build pipeline ----
list(
### 01 - get / process data ------------------------------------------------
#### 01-1 - CES data ----
tar_target(name = raw_ces_data_file,
command = f_download_raw_ces_data(
url_ces_shp = 'https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40shpf2021shp.zip',
download_directory = '01-2_data_raw/ces_data'),
format = 'file'
),
#### NOTE: I manually created the ces-4_names.csv file to make more descriptive
#### names for the fields in the CES 4.0 shapefile, based on the 'Data Dictionary'
#### tab in the excel workbook at:
#### https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryf2021.zip
tar_target(name = ces_names_file,
command = here('01-1_data_input_manual',
'ces-4_names.csv'),
format = 'file'
),
tar_target(name = sf_formatted_ces_data,
command = f_process_ces_data(
raw_ces_data_file,
ces_names_file,
output_file_name = 'calenviroscreen_4-0_processed',
output_directory = '01-3_data_processed/ces_data')
),
#### 01-2 - HOLC (redline) data ----
tar_target(name = raw_holc_data_files,
command = f_download_raw_holc_data(
url_base = 'https://dsl.richmond.edu/panorama/redlining/static/downloads/',
download_directory = '01-2_data_raw/holc_data')
),
tar_target(name = holc_area_descriptions,
command = f_parse_holc_descriptions(
raw_holc_data_files,
output_directory = '01-3_data_processed/holc_data/area_descriptions')
),
tar_target(name = sf_formatted_holc_data,
command = f_process_holc_data(
raw_holc_data_files,
holc_area_descriptions,
output_file_name = 'HOLC_maps_processed',
output_directory = '01-3_data_processed/holc_data')
),
### 02 - calculate CES scores & demographics -------------------------------
#### 02-1 - assign minimum CES coverage threshold ----
## (this represents the minimum portion of a HOLC neighborhood's area that needs to
## be covered by CES tracts that have CES scores (for any given CES indicator)
## in order to assign a score to a HOLC neighborhood - some tracts are missing
## scores for individual indicators or overall CES scores)
tar_target(name = ces_coverage_threshold,
command = {
ces_coverage_threshold <- 0.5
}
),
#### 02-2 - calculate CES scores (by HOLC neighborhood) ----
tar_target(name = df_holc_ces_scores_calculations,
command = f_compute_HOLC_CES_scores(
sf_formatted_ces_data,
sf_formatted_holc_data,
## set minimum portion of a HOLC polygon that must be covered
## by CES polygon(s) with CES score for given CES measure
## (if coverage < threshold, set CES score to NA for that measure)
ces_coverage_threshold)
),
tar_target(name = ces_scores_missing_check,
command = f_check_missing_CES_scores(
df_holc_ces_scores_calculations)
),
tar_target(name = df_holc_ces_scores_summary,
command = f_summarize_HOLC_CES_scores(
df_holc_ces_scores_calculations,
sf_formatted_ces_data
)),
#### 02-3 - calculate demographics (by HOLC neighborhood) ----
tar_target(name = df_holc_demographics_calculations,
command = f_compute_HOLC_demographics(
sf_formatted_ces_data,
sf_formatted_holc_data)
),
tar_target(name = df_holc_demographics_summary,
command = f_summarize_HOLC_demographics(
df_holc_demographics_calculations)
),
#### 02-4 - calculate / compare nearest centroid CES scores (by HOLC neighborhood) ----
tar_target(name = sf_holc_ces_scores_centroids,
command = f_compute_HOLC_CES_scores_centroids(
sf_formatted_ces_data,
sf_formatted_holc_data,
ces_measure_id = 'calenviroscreen_4_0_score',
output_file_name = 'HOLC_CES_scores_centroids.gpkg',
output_directory = '03-1_output_data')
),
tar_target(name = df_holc_ces_scores_comparison,
command = f_combine_HOLC_CES_score_methods(
df_holc_ces_scores_calculations,
sf_holc_ces_scores_centroids,
ces_measure_id = 'calenviroscreen_4_0_score'
)
),
tar_target(name = holc_ces_score_methods_correlation,
command = f_HOLC_CES_score_methods_correlation(
df_holc_ces_scores_comparison
)
),
### 03 - combine data & create output file ---------------------------------
#### 03-0 - data dictionary file
tar_target(name = data_dictionary_file,
command = here('03-1_output_data', 'data_dictionary_outputs.csv'),
format = 'file'
),
#### 03-1 - combine data and create geopackage
tar_target(name = sf_combined_results,
command = f_combine_computed_data(
df_holc_ces_scores_summary,
df_holc_demographics_summary,
sf_formatted_holc_data,
output_file_name = 'HOLC_CES_scores_demographics.gpkg',
output_directory = '03-1_output_data')
),
#### 03-2 - write shapefile
tar_target(name = write_shapefile,
command = f_convert_to_shapefile(
sf_combined_results,
data_dictionary_file,
output_file_name = 'HOLC_CES_scores_demographics.shp',
output_directory = '03-1_output_data/HOLC_CES_scores_demographics_shp'),
format = 'file'
),
### 04 - create plots & maps -----------------------------------------------
#### 04-1 - map (showing analysis process) - 4 panes ----
#### NOTE: this plot can't be saved as an RDS file, so the target is just
#### saving the path to the output png file - read the plot into R with:
#### magick::image_read(tar_read(plot_map_panels))
tar_target(name = plot_map_panels,
command = f_plot_map_panels(
sf_formatted_ces_data,
sf_formatted_holc_data,
sf_combined_results,
city_selected = 'Stockton',
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '01_map-combined',
mapbox_api_key = Sys.getenv('mapbox_api_key'), # need to have a mapbox API key (free) saved as an environment variable
basemap_type = 'mapbox'), # 'mapbox' or 'osm'
format = 'file'
),
#### 04-2 - CES scores - points - raw score (grouped by city / HOLC grade) ----
tar_target(name = plot_scores_points_raw,
command = f_plot_scores_points_raw(
sf_combined_results,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'Estimated CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '02_raw-score_point_by-city')
),
#### 04-3 - CES scores - points - average score by city / HOLC grade ----
tar_target(name = plot_scores_points_average_by_grade,
command = f_plot_scores_points_average_by_grade(
sf_combined_results,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'Estimated CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '03_average-score_point_by-city')
),
#### 04-3 - demographics (race) - bar plot ----
tar_target(name = plot_race_bars_by_group,
command = f_plot_race_bars_by_group(
sf_combined_results,
output_directory = '03-2_output_plots',
output_file_name = '04_race_bar_by-race')
),
#### 04-4 - CES scores - points - departure score (grouped by city / HOLC grade) ----
tar_target(name = plot_scores_points_departure,
command = f_plot_scores_points_departure(
sf_combined_results,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'Estimated CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '99_departure-score_point_by-city')
),
#### 04-5 - CES departure scores - box plot ----
tar_target(name = plot_scores_box_departure,
command = f_plot_scores_box_departure(
sf_combined_results,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'Estimated CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '99_departure-score_box_by-holc-grade')
),
#### 04-6 - CES departure scores - box plot w/ legend ----
tar_target(name = plot_scores_box_departure_legend,
command = f_plot_scores_box_departure_legend(
sf_combined_results,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'Estimated CES 4.0 Score', # 'CalEnviroScreen 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '99_departure-score_box_by-holc-grade_with-legend',
error_bar = TRUE,
outer_point = FALSE)
),
#### 04-7 - CES scores method comparison ----
tar_target(name = plot_scores_method_comparison_scatter,
command = f_plot_scores_method_comparison_scatter(
df_holc_ces_scores_comparison,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'CES 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '99_score_method_comparison_scatter'
)
),
#### 04-8 - CES scores method comparison - faceted ----
tar_target(name = plot_scores_method_comparison_scatter_facet,
command = f_plot_scores_method_comparison_scatter_facet(
df_holc_ces_scores_comparison,
ces_measure_id = 'calenviroscreen_4_0_score',
ces_measure_title = 'CES 4.0 Score',
output_directory = '03-2_output_plots',
output_file_name = '99_score_method_comparison_scatter_facet'
)
),
### 05 - create reports / presentations ------------------------------------
tar_quarto(name = summary_report,
path = '03-3_output_reports/summary_report.qmd'
),
tar_target(name = summary_report_html,
command = {
# force this to re-run if the qmd file has changed
last_report_update
# render
quarto_render(input = '03-3_output_reports/summary_report.qmd',
output_format = 'html')
},
format = 'file'),
tar_quarto(name = targets_notes_file,
path = 'targets_notes.qmd'),
### 06 - readme file -------------------------------------------------------
tar_quarto(name = readme_file,
path = 'README.qmd')
)