diff --git a/breakpoint_tasks.py b/breakpoint_tasks.py index 6ee05d40..6eb86148 100644 --- a/breakpoint_tasks.py +++ b/breakpoint_tasks.py @@ -101,7 +101,8 @@ def requires(self): 'climate': export_tasks.ClimateExportTask(), 'sweep': export_tasks.SweepExportTask(), 'hist': export_tasks.HistExportTask(), - 'summary': export_tasks.SummaryExportTask() + 'summary': export_tasks.SummaryExportTask(), + 'combined': export_tasks.CombinedTasksRecordTask() } def output(self): diff --git a/paper/fill_template.py b/paper/fill_template.py index 45208e48..1ec73fcc 100644 --- a/paper/fill_template.py +++ b/paper/fill_template.py @@ -20,7 +20,11 @@ def main(): source_contents = f.read() loader = jinja2.BaseLoader() - template = jinja2.Environment(loader=loader).from_string(source_contents) + template = jinja2.Environment( + loader=loader, + comment_start_string='{//', + comment_end_string='//}' + ).from_string(source_contents) with open(template_loc) as f: template_vals = json.load(f) diff --git a/paper/paper.bib b/paper/paper.bib index 35259b69..f6972502 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1484,3 +1484,26 @@ @article{hunt_fossil_2020 langid = {english}, file = {Full Text:/home/sam/Zotero/storage/2T72GJT4/Hunt et al. - 2020 - Fossil Energy Use, Climate Change Impacts, and Air.pdf:application/pdf}, } + +@article{knight_developing_2010, + title = {Developing Variable Unit-Structure Premium Rate Differentials in Crop Insurance}, + volume = {92}, + issn = {00029092, 14678276}, + url = {http://www.jstor.org/stable/40647972}, + abstract = {Federal crop insurance programs offer producers the option of insuring farm units individually or as an aggregate unit. Existing programs offer a fixed 10\% discount for most growers taking coverage at the aggregate level. This article describes an analysis of risk changes when units are aggregated. The methods described here, which base unit aggregation discounts on observable farm characteristics, are approved for implementation into the Federal Crop Insurance Program.}, + pages = {141--151}, + number = {1}, + journaltitle = {American Journal of Agricultural Economics}, + author = {Knight, Thomas O. and Coble, Keith H. and Goodwin, Barry K. and Rejesus, Roderick M. and Seo, Sangtaek}, + urldate = {2024-07-11}, + date = {2010}, + note = {Publisher: [Agricultural \& Applied Economics Association, Oxford University Press]}, +} + +@misc{zulauf_importance_2023, + title = {The Importance of Insurance Unit in Crop Insurance Policy Debates}, + url = {https://farmdocdaily.illinois.edu/2023/06/the-importance-of-insurance-unit-in-crop-insurance-policy-debates.html}, + publisher = {University of Illinois}, + author = {Zulauf, Carl}, + date = {2023-06-12}, +} diff --git a/paper/paper.md b/paper/paper.md index 6dda0a0a..3ba593dc 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -40,9 +40,9 @@ Global warming threatens production of key staple crops, including maize [@rezae In the United States of America, the world’s largest maize producer and exporter [@ates_feed_2023], the Federal Crop Insurance Program (FCIP) covers a large share of this growing risk [@tsiboe_crop_2023]. The costs of crop insurance in the U.S. have already increased by 500% since the early 2000s with annual indemnities reaching $19B in 2022 [@schechinger_crop_2023]. Furthermore, retrospective analysis attributes 19% of "national-level crop insurance losses" between 1991 and 2017 to climate warming, an estimate rising to 47% during the drought-stricken 2012 growing season [@diffenbaugh_historical_2021]. Looking forward, @li_impact_2022 show progressively higher U.S. maize loss rates as warming elevates. -Modeling the possible changes in frequency and severity of crop loss events that trigger indemnity claims is an important step to prepare for the future impacts of global warming. Related studies have predicted changes in crop yields at county-level aggregation [@leng_predicting_2020] and have estimated climate change impacts to U.S. maize within whole-sector or whole-economy analysis [@hsiang_estimating_2017]. Even so, as insurance products may include elements operating at the producer level [@rma_crop_2008], often missing are more granular models of insurer-focused claims rate and loss severity at the level of the insured set of fields within a policy ("risk unit") across a large region. Such far-reaching but detailed data are prerequisite to designing proactive policy instruments benefiting both institutions and growers. +Modeling the possible changes in frequency and severity of crop loss events that trigger indemnity claims is an important step to prepare for the future impacts of global warming. Related studies have predicted changes in crop yields at county-level aggregation [@leng_predicting_2020] and have estimated climate change impacts to U.S. maize within whole-sector or whole-economy analysis [@hsiang_estimating_2017]. Even so, as insurance products may include elements operating at the producer level [@rma_crop_2008], often missing are more granular models of insurer-focused claims rate and loss severity at the level of the risk unit^[The "risk unit" refers to set of insured fields or an insured area within an individual policy.] across a large region. Such far-reaching but detailed data are prerequisite to designing proactive policy instruments benefiting both institutions and growers. -We address this need by predicting the probability and severity of maize loss within the U.S. Corn Belt at sub-county-level, probabilistically forecasting insurer-relevant outcome metrics under climate change. We find these projections using simulations of the Multiple Peril Crop Insurance Program, "the oldest and most common form of federal crop insurance" [@chite_agricultural_2006]. More precisely, we model changes to risk under the Yield Protection (YP) plan, which covers farmers in the event of yield losses due to an insured cause. Furthermore, by contrasting those simulations to a counterfactual which does not include further climate warming, we then quantitatively highlight the insurer-relevant effects of climate change in the 2030 and 2050 timeframes. Finally, we use these data to suggest possible policy changes to help mitigate and enable adaptation to the specific climate-fueled risks we observe in our results. +We address this need by predicting the probability and severity of maize loss within the U.S. Corn Belt at sub-county-level, probabilistically forecasting insurer-relevant outcome metrics under climate change. We find these projections using simulations of the Multiple Peril Crop Insurance Program, "the oldest and most common form of federal crop insurance" [@chite_agricultural_2006]. More precisely, we model changes to risk under the Yield Protection (YP) plan, which covers farmers in the event of yield losses due to an insured cause. Furthermore, by contrasting those simulations to a "counterfactual" which does not include further climate warming, we then quantitatively highlight the insurer-relevant effects of climate change in the 2030 and 2050 timeframes. Finally, we use these data to suggest possible policy changes to help mitigate and enable adaptation to the specific climate-fueled risks we observe in our results. \bigskip @@ -67,9 +67,9 @@ $s = \frac{l}{y_{expected}} = \max(c - \frac{y_{actual}}{y_{expected}}, 0) = \ma Note that we define severity from the insurer perspective, reporting the percentage points gap between actual yield and the covered portion of expected yield. ## Data -As APH operates at unit-level, modeling these formulations requires highly local yield and climate information. Therefore, we use the Scalable Crop Yield Mapper (SCYM) which provides remote sensed yield estimations from 1999 to 2022 at 30m resolution across the US Corn Belt [@lobell_scalable_2015; @deines_million_2021]. Meanwhile, in order to predict these differential outcomes, we use climate data from CHC-CMIP6 [@williams_high_2024] which, at daily 0.05 degree scale, offers both historic data from 1983 to 2016 as well as future projections in a 2030 and 2050 series. In choosing from its two available shared socioeconomic pathways, we use the “intermediate” SSP245 within CHC-CMIP6 over SSP585 per @hausfather_emissions_2020. This offers the following climate variables for modeling: precipitation, temperature (minimum and maximum), relative humidity (n, x, average), heat index, wet bulb temperature, VPD, and SVP. +As YP operates at unit-level, modeling these formulations requires highly local yield and climate information. Therefore, we use the Scalable Crop Yield Mapper (SCYM) which provides remote sensed yield estimations from 1999 to 2022 at 30m resolution across the US Corn Belt [@lobell_scalable_2015; @deines_million_2021]. Meanwhile, in order to predict these differential outcomes, we use climate data from CHC-CMIP6 [@williams_high_2024] which, at daily 0.05 degree scale, offers both historic data from 1983 to 2016 as well as future projections in a 2030 and 2050 series. In choosing from its two available shared socioeconomic pathways, we prefer the “intermediate” SSP245 within CHC-CMIP6 over SSP585 per @hausfather_emissions_2020. This offers the following climate variables for modeling: precipitation, temperature (minimum and maximum), relative humidity (n, x, average), heat index, wet bulb temperature, VPD, and SVP. -We align these variables to a common grid in order to create the discrete instances needed for model training and evaluation. To that end, we create "neighborhoods" [@manski_diversified_2024] of geographically proximate fields paired with climate data through 4 character geohashing [@niemeyer_geohashorg_2008], defining small populations in a grid of cells roughly 28 by 20 kilometers for use within statistical tests [@haugen_geohash_2020]. Having created these spatial groups, we model against observed distributions of changes from historic yield ($y_{expected}$) or "yield deltas" which we describe as mean and standard deviation, helping ensure dimensionality is appropriate for the size of the input dataset. Finally, we similarly describe climate variable deltas as min, max, mean and standard deviation per month. We also evaluate alternative neighborhood sizes in supplemental materials. +We align these variables to a common grid in order to create the discrete instances needed for model training and evaluation. More specifically, we create "neighborhoods" [@manski_diversified_2024] of geographically proximate fields paired with climate data through 4 character^[We also evaluate alternative neighborhood sizes in supplemental materials.] geohashing [@niemeyer_geohashorg_2008], defining small populations in a grid of cells roughly 28 by 20 kilometers for use within statistical tests [@haugen_geohash_2020]. Having created these spatial groups, we model against observed deviations from yield expectations which YP defines through historic yield ($y_{expected}$). This creates a distribution of changes or "yield deltas" which we summarize as neighborhood-level means and standard deviations, helping ensure dimensionality is appropriate for the size of the input dataset. Finally, we similarly describe climate variable deltas as min, max, mean and standard deviation per month. ## Regression With these data in mind, we next build predictive models for use in simulations of future insurance outcomes. Using machine learning per @leng_predicting_2020, we build regressors forecasting change in yields. Specifically, we use a feed forward artificial neural network [@baheti_essential_2021] as it: @@ -81,33 +81,35 @@ As described below, we fit ($f$) neighborhood-level climate variables ($C$) and $y_{\Delta\%}(x) = \frac{y_{actual} - y_{expected}}{y_{expected}} = \frac{y_{\Delta}}{y_{\mu-historic}} = f_{z}(C, x, z_{\mu-historic}, z_{\sigma-historic})$ -Many different kinds of neural network structures could meet these criteria. Therefore, building individual networks using the Adam optimizer [@kingma_adam_2014], we also try various combinations of "hyper-parameters" in a grid search sweep [@joseph_grid_2018]. Most of these options address "overfitting" in which regressors learn non-generalizable trends from input data. Specifically, this performance-optimizing algorithm permutes different numbers of layers, dropout rates [@srivastava_dropout_2014], L2 regularization strengths [@tewari_regularization_2021], and removal of input attributes. In total, we select a preferred configuration from 2,400 candidate models before retraining on all available data ahead of simulations. In this design, all non-output neurons use Leaky ReLU activation per @maas_rectifier_2013. +Many different kinds of neural network structures could meet these criteria. | **Parameter** | **Options** | **Description** | **Purpose** | -| ----------------------------------------------------- | -------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | +| ----------------------------------------------------- | ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | | Layers | 1 - 6 | Number of feed forward layers to include where 2 layers include 32 and then 8 nodes while 3 layers include 64, 32, and 8. Layer sizes are {512, 256, 128, 64, 32, 8}. | More layers might allow networks to learn more sophisticated behaviors but also might overfit to input data. | | Dropout | 0.00, 0.01, 0.05, 0.10, 0.50 | This dropout rate applies across all hidden layers. | Random disabling of neurons may address overfitting. | | L2 | 0.000, 0.001, 0.010, 0.100 | This L2 regularization strength applies across all hidden layer neuron connections. | Penalizing networks with edges that are "very strong" may confront overfitting without changing the structure of the network itself. | | Attr Drop | 10 | Retraining where the sweep individually drops each of the dozen input distributions or year or keeps all inputs. | Removing attributes helps determine if an input may be unhelpful. | | Count | Yes / No | Indicate if the model can access the count of observations in a geohash. | Determine if having information availability is helpful. | -## Simulation -After training machine learning models using historical data, predictions of future distributions feed into Monte Carlo simulations [@metropolis_beginning_1987; @kwiatkowski_monte_2022] in the 2030 and 2050 CHC-CMIP6 series [@williams_high_2024]. +Table: Parameters which try in different permutations to find an optimal configuration. {#tbl:sweepparam} + +Therefore, building individual networks using the Adam optimizer [@kingma_adam_2014], we also try various combinations of "hyper-parameters" in a grid search sweep [@joseph_grid_2018]. Most of these options address "overfitting" in which regressors learn non-generalizable trends from input data. Specifically, this performance-optimizing algorithm permutes the parameters in Table @tbl:sweepparam: different numbers of layers, dropout rates [@srivastava_dropout_2014], L2 regularization strengths [@tewari_regularization_2021], and removal of input attributes. In total, we select a preferred configuration from 2,400 candidate models before retraining on all available data ahead of simulations. In this design, all non-output neurons use Leaky ReLU activation per @maas_rectifier_2013. -![Model pipeline overview diagram. Code released as open source.](./img/pipeline.png "Model pipeline overview diagram. Code released as open source."){ width=80% } +## Simulation +After training machine learning models using historical data, predictions of future distributions feed into Monte Carlo simulations [@metropolis_beginning_1987; @kwiatkowski_monte_2022] in the 2030 and 2050 CHC-CMIP6 series [@williams_high_2024] as described in Figure @fig:pipeline. With trials consisting of sampling at the neighborhood scale, this approach allows us to consider many possible values to understand what the distribution of outcomes may look like in the future and make probability statements about insurance-relevant events. In addition to sampling climate variables and model error residuals to propagate uncertainty [@yanai_estimating_2010], we also draw multiple times from a neighborhood to approximate the size of an insured unit^[In this operation, we also draw the unit size itself randomly per trial from historic data [@rma_statecountycrop_2024].] as the exact geospatial risk unit structure is not publicly known. Altogether, this simulates a single unit per year for 5 years. -With trials consisting of sampling at the neighborhood scale, this approach allows us to consider many possible values to understand what the distribution of outcomes may look like in the future and make probability statements about insurance-relevant events. In addition to sampling climate variables, we also "draw" different model error residuals to propagate uncertainty [@yanai_estimating_2010]. Furthermore, as the exact geospatial risk unit structure is not publicly known, these trials also sample multiple times from a neighborhood to approximate the size of an insured unit. In this operation, we also draw the unit size itself randomly per trial from historic data [@rma_statecountycrop_2024]. Altogether, this simulates a single unit per year for 5 years. +![Model pipeline overview diagram. Code released as open source.](./img/pipeline.png "Model pipeline overview diagram. Code released as open source."){ width=80% #fig:pipeline } -Finally, to determine significance, we use Mann Whitney U [@mann_test_1947] with Bonferroni-correction [@bonferroni_il_1935] at neighborhood-level changes per year as variance may differ between the two expected and counterfactual sets [@mcdonald_handbook_2014]. Here observe that, though offering predictions at 30 meter scale, SYCM uses Daymet variables at 1 km resolution [@thornton_daymet_2014] and, thus, we more conservatively assume this 1km granularity in determining sample sizes. +Finally, to determine significance, we use Mann Whitney U [@mann_test_1947] with Bonferroni-correction [@bonferroni_il_1935] at neighborhood-level changes per year as variance may differ between the two expected and counterfactual sets [@mcdonald_handbook_2014]. Here we observe that, though offering predictions at 30 meter scale, SYCM uses Daymet variables at 1 km resolution [@thornton_daymet_2014] and, thus, we more conservatively assume this 1km granularity in determining sample sizes. ## Evaluation -We choose our model using each candidate’s capability to predict into future years, a "sweep temporal displacement" task representative of the Monte Carlo simulations with three steps [@brownlee_what_2020]. +We choose our model using each candidate’s capability to predict into future years, a "sweep temporal displacement" task representative of the Monte Carlo simulations [@brownlee_what_2020]: - Train on all data between 1999 to 2012 inclusive. - Use 2014 and 2016 as a validation set to compare among the 2,400 candidate models. - Test in which 2013 and 2015 serve as a fully hidden set in order to estimate how the chosen model may perform in the future. -This "sweep" structure which involves trying different model parameter permutations like L2 and Dropout rates fits with a relatively small dataset. Therefore, having performed model selection, we further evaluate our chosen regressor through three additional tests which more practically estimate performance in different ways one may consider using this method (random assignment, temporal displacement, and spatial displacement) while using a larger training set. +This involves trying different model parameter permutations like L2 and Dropout rates while fitting with a relatively small training set in order to offer a separate fully hidden test set. Therefore, having performed model selection, we further evaluate our chosen regressor through three additional tests which more practically estimate performance in different ways one may consider using this method (see Table @tbl:posthoc) while using a larger and, thus, more representative training set. | **Trial** | **Purpose** | **Train** | **Test** | | ------------------------------------------------------------------------ | --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | @@ -115,6 +117,8 @@ This "sweep" structure which involves trying different model parameter permutati | Temporal Displacement | Evaluate ability to predict into future years. | All data from 1999 to 2013 inclusive. | All data 2014 to 2016 inclusive. | | Spatial Displacement | Evaluate ability to predict into unseen geographic areas. | All 4 character geohashes in a randomly chosen 75% of 3 character regions. | Remaining 25% of regions. | +Table: Overview of trials after model selection. {#tbl:posthoc} + These post-hoc trials use only training and test sets as we fully retrain models using unchanging sweep-chosen hyper-parameters. Note that some of these tests use "regions" which we define as all geohashes sharing the same first three characters. This two tier definition creates a grid of 109 x 156 km cells [@haugen_geohash_2020] each including all neighborhoods (4 character geohashes) found within that area. \bigskip @@ -123,68 +127,89 @@ These post-hoc trials use only training and test sets as we fully retrain models We project loss probabilities to more than double ({{experimentalProbability2050}} claims rate) under SSP245 at mid-century in comparison to the no additional warming counterfactual scenario ({{counterfactualProbability2050}} claims rate) even as the average yield sees only minor changes from current values under the climate change simulation. ## Neural network outcomes -With slight bias towards the mean prediction task, we select {{numLayers}} layers ({{layersDescription}}) using {{dropout}} dropout and {{l2}} L2 from our sweep (count information {{countInfoStr}}). As further explored in supplemental materials, we use all input variables. +With slight bias towards the mean prediction task, we select {{numLayers}} layers ({{layersDescription}}) using {{dropout}} dropout and {{l2}} L2 from our sweep (count information {{countInfoStr}}) with performance described in Table @tbl:sweep. As further explored in supplemental materials, we use all input variables. -| **Set** | **Mean Prediction MAE** | **Std Prediction MAE** | +| **Set** | **MAE for Mean Prediction** | **MAE for Std Prediction** | | ---------- | ----------------------- | ---------------------- | | Train | {{trainMeanMae}} | {{trainStdMae}} | | Validation | {{validationMeanMae}} | {{validationStdMae}} | | Test | {{testMeanMae}} | {{testStdMae}} | -The selected model sees some overfit but this trial fits with fewer data points than the simulations' model. Therefore, having chosen this set of hyper-parameters, we further evaluate regression performance through varied definitions of test sets representing different tasks with a larger training set. +Table: Results of chosen configuration during the "sweep" for model selection. {#tbl:sweep} + +The selected model sees some overfit but meta-parameter optimization uses fewer training data points than the simulations' model. Therefore, having chosen this set of hyper-parameters, we further evaluate regression performance through varied definitions of test sets representing different tasks with a larger training set. | **Task** | **Test Mean Pred MAE** | **Test Std Pred MAE** | **% of Units in Test Set** | | --------------------- | ---------------------- | --------------------- | -------------------------- | -| Temporal Displacement | {{temporalMeanMae}} | {{temporalStdMae}} | {{temporalPercent}} | -| Spatial Displacement | {{spatialMeanMae}} | {{spatialStdMae}} | {{spatialPercent}} | +| Temporal | {{temporalMeanMae}} | {{temporalStdMae}} | {{temporalPercent}} | +| Spatial | {{spatialMeanMae}} | {{spatialStdMae}} | {{spatialPercent}} | | Random | {{randomMeanMae}} | {{randomStdMae}} | {{randomPercent}} | -In these trials, the temporal displacement task best resembles expected error in simulations. +Table: Results of tests after model selection. {#tbl:posthocresults} + +In these trials as oulined in Table @tbl:posthocresults, the temporal displacement task best resembles expected error in simulations. ## Simulation outcomes -{{percentSignificant}} of neighborhoods in SSP245 across both the 2030 and 2050 see significant changes to claim probability in at least one year ($p<0.05/n$). We observe that the remaining neighborhoods failing to meet that threshold often have less land dedicated to corn within their area and, thus, a smaller sample size in our simulations. +Despite the conservative nature of the Bonferroni correction [@mcdonald_handbook_2014] and the 1km sample assumption, {{percentSignificant}} of neighborhoods in SSP245 see significant changes to claim probability ($p<0.05/n$) though we observe that some of the remaining neighborhoods failing to meet that threshold have less land dedicated to maize within their area and, thus, a smaller sample size in our simulations. | **Scenario** | **Year** | **Unit mean yield change** | **Unit loss probability** | **Avg covered loss severity** | | ---------------------------- | -------- | -------------------------- | --------------------------------- | ------------------------------ | -| Counterfactual | 2030 | {{counterfactualMean2030}} | {{counterfactualProbability2030}} | {{counterfactualSeverity2030}} | -| SSP245 | 2030 | {{experimentalMean2030}} | {{experimentalProbability2030}} | {{experimentalSeverity2030}} | -| Counterfactual | 2050 | {{counterfactualMean2050}} | {{counterfactualProbability2050}} | {{counterfactualSeverity2050}} | -| SSP245 | 2050 | {{experimentalMean2050}} | {{experimentalProbability2050}} | {{experimentalSeverity2050}} | +| Counterfactual | 2030 | {% if counterfactualMean2030|float > 0 %}+{% endif %}{{counterfactualMean2030}} | {{counterfactualProbability2030}} | {{counterfactualSeverity2030}} | +| SSP245 | 2030 | {% if experimentalMean2030|float > 0 %}+{% endif %}{{experimentalMean2030}} | {{experimentalProbability2030}} | {{experimentalSeverity2030}} | +| Counterfactual | 2050 | {% if counterfactualMean2050|float > 0 %}+{% endif %}{{counterfactualMean2050}} | {{counterfactualProbability2050}} | {{counterfactualSeverity2050}} | +| SSP245 | 2050 | {% if experimentalMean2050|float > 0 %}+{% endif %}{{experimentalMean2050}} | {{experimentalProbability2050}} | {{experimentalSeverity2050}} | | | | $y_{\Delta \mu}$ | $p_{\mu}$ | $s_{\mu}$ | -In highlighting these climate threats, these simulations suggest that warming disrupts historic trends of increasing average yield @nielsen_historical_2023]. However, while these SSP245 yield means remain similar to the historic baseline, the distribution tails differ more substantially, showing more than double the counterfactual loss probability in the 2050 series compared to counterfactual. +Table: Overview of Monte Carlo simulation results. {#tbl:simresults} -![One of our interactive tools showing 2050 outcomes distribution relative to $y_{expected}$ highlighting loss with and without climate change.](./img/hist.png "One of our interactive tools showing 2050 outcomes distribution relative to $y_{expected}$ highlighting loss with and without climate change."){ width=85% } +With Table @tbl:simresults highlighting these climate threats, these simulations suggest that warming disrupts historic trends of increasing average yield [@nielsen_historical_2023]. -Note that, our open source pipeline allows for configuration of this simulation including switching to field instead of risk unit. As anticipated, we observe that simulation at field level increases the claims rate relative to the unit-level simulations in all scenarios, confirming the expected portfolio effect. +![One of our interactive tools showing 2050 outcomes distribution relative to $y_{expected}$ highlighting loss with and without climate change.](./img/hist.png "One of our interactive tools showing 2050 outcomes distribution relative to $y_{expected}$ highlighting loss with and without climate change."){ width=95% #fig:hist } + +However, while these SSP245 yield means remain similar to the historic baseline, Figure @fig:hist reveals how the distribution tails differ more substantially, showing more than double the counterfactual loss probability in the 2050 series compared to counterfactual. \bigskip # Discussion In addition to highlighting future work opportunities, we observe a number of policy-relevant dynamics within our simulations. +## Historic yield improvements +Prior work suggests that historic trends would anticipate continued increases in maize outputs [@nielsen_historical_2023] but our simulations predict climate change to wipe out the {{ counterfactualMean2050 }} yield increase that it would otherwise expect within the counterfactual simulation. In the place of that growth, we find yields under SSP245 to slightly drop in 2050 relative to current levels. Notably, this decrease in average yields impacts the claims rate which first increases to {{ experimentalProbability2030 }} at 2030 before decreasing to {{ experimentalProbability2050 }} at 2050. This evolving profile arises in part due to the non-linear impacts of climate change where the yield expectation ($y_{expected}$) may be lower in 2050 than 2030. We note that this decrease may impact not just aggregate output but also grower usage of the trend adjustment option when reporting their yield expectations [@plastina_trend-adjusted_2014]. + +## Risk unit size +Prior work expects that larger insured units will reduce risk [@knight_developing_2010] and we similarly observe that the claims rate decreases as the acrage included in an insured unit grows. However, after attempting multiple insured unit sizes including removal of smaller Optional Units [@zulauf_importance_2023] in post-hoc simulations, a gap persists in claims rates between the counterfactual and expected climate change simulations, suggesting our concerns may remain relevant across different risk units. + ## Stress -First, we note that our neural network depresses yields during combined warmer and drier conditions potentially similar to 2012 which saw poor US maize outcomes [@ers_weather_2013]. Our predictions may add additional evidence to prior empirical studies such as @sinsawat_effect_2004 and @marouf_effects_2013 which describe the negative impacts of heat stress and water deficits. Indeed, this prior work may explain why precipitation may serve as a protective factor: those with drier July conditions are more likely to see higher loss probability (p < 0.05 / 2) in both the 2030 and 2050 series via rank correlation [@spearman_proof_1904] though $\rho = -0.2$, suggesting many simultaneous factors at play.. +Our model shows depressed yields during combined warmer and drier conditions potentially similar to 2012 which saw poor US maize outcomes [@ers_weather_2013]. -![Screenshot of one of our interactive tools, showing precipitation and loss probability changes where precipitation may offer some protective benefit in which the x axis is precipitation (chrips) and the y axis is the change in claims rate (probability of covered loss).](./img/scatter.png "Screenshot of one of our interactive tools, showing precipitation and loss probability changes where precipitation may offer some protective benefit in which the x axis is precipitation (chrips) and the y axis is the change in claims rate (probability of covered loss)."){ width=85% } +![Screenshot of an interactive tools showing precipitation and loss probability changes. The horizontal axis is precipitation and the vertical axis is the change in claims rate (probability of covered loss).](./img/scatter.png "Screenshot of an interactive tools showing precipitation and loss probability changes. The horizontal axis is precipitation and the vertical axis is the change in claims rate (probability of covered loss)."){ width=85% #fig:chirps } -All that said, this concurrence between "top-down" remote sensing and "bottom-up" physical experimentation not only offers further confidence in these results but these specific results may reveal geographically and temporally specific outlines of these threats possibly useful for insurer and grower adaptation. +In this context, precipitation may serve as a protective factor: neighborhoods with drier July conditions are more likely to see higher loss probability (p < 0.05 / 2) in both the 2030 and 2050 series via rank correlation [@spearman_proof_1904]. That in mind, our predictions may add additional evidence to prior empirical studies such as @sinsawat_effect_2004 and @marouf_effects_2013 which describe the negative impacts of heat stress and water deficits. This possible concurrence between "top-down" remote sensing and "bottom-up" physical experimentation not only offers further confidence in these results but our model outputs may reveal geographically and temporally specific outlines of these threats, possibly useful for insurer and grower adaptation. Even so, as seen in Figure @fig:chirps, we caution that analysis finds $\rho = -0.2$ so other factors beyond temperature and precipitation alone may influence these outcomes. -## Policy implications -Adaptation to these adverse conditions is imperative for both farmers and insurers [@oconnor_covering_2017; @mcbride_redefining_2020.]. In order to confront this alarming increase in climate-driven risk, preparations may include altered planting dates [@mangani_projecting_2023], physically moving operations [@butler_adaptation_2013], employing stress-resistant varieties [@tian_genome_2023], modifying pesticide usage [@keronen_management_2023], and adopting risk-mitigating regenerative farming systems [@renwick_long-term_2021]. Indeed, such systems can reduce risks through both portfolio effects of diverse crop rotations and improvements to soil health while also providing other environmental benefits [@hunt_fossil_2020]. Yet significant structural and financial barriers inhibit adoption of such systems [@mcbride_redefining_2020]. In particular, though the magnitude remains the subject of empirical investigation [@connor_crop_2022], financial safety net programs like crop insurance may reduce adoption of regenerative systems @wang_warming_2021; @chemeris_insurance_2022] despite likely benefits for both farmers and insurers [@oconnor_covering_2017]. +## Adaptation +Adaptation to these adverse conditions is imperative for both farmers and insurers [@oconnor_covering_2017; @mcbride_redefining_2020.]. In order to confront this alarming increase in climate-driven risk, preparations may include: -In this context, these simulations reveal how this particular predicted combination of stable yield averages ($y_{expected}$) paired with higher loss probabilities ($l$) not only poses particularly difficult challenges to insurers but offers unique adaptation opportunities. Structurally, average-based production histories [@fcic_common_2020] reward increases in mean yield but may be poorly situated to confront reduced yield stability. Indeed, despite possibly guarding against this elevation in loss events [@bowles_long-term_2020; @renwick_long-term_2021], average-based production histories may discourage regenerative agriculture that increases yield stability but may not improve mean yields or even come at the cost of a slightly reduced average [@deines_recent_2023]. + - Altered planting dates [@mangani_projecting_2023]. + - Physically moving operations [@butler_adaptation_2013]. + - Employing stress-resistant varieties [@tian_genome_2023]. + - Modifying pesticide usage [@keronen_management_2023]. + - Adopting risk-mitigating regenerative farming systems [@renwick_long-term_2021]. -If coverage levels were redefined from the current percentage based approach ($l_{\%}$) to variance ($l_{\sigma}$), then improvements both in average yield and stability could be rewarded. +Most notably, regenerative practices can reduce risks like through portfolio effects of diverse crop rotations [@bowles_long-term_2020] and improvements to soil health [@renwick_long-term_2021]. Still, even though those these important steps may provide output stability in addition to other environmental benefits [@hunt_fossil_2020], significant structural and financial barriers inhibit adoption of such systems [@mcbride_redefining_2020]. In particular, though the magnitude remains the subject of empirical investigation [@connor_crop_2022], financial safety net programs like crop insurance may reduce adoption [@wang_warming_2021; @chemeris_insurance_2022] despite likely benefits for both farmers and insurers [@oconnor_covering_2017]. + +## Policy structure +Our simulations reveal how this particular predicted combination of stable yield averages ($y_{expected}$) paired with higher loss probabilities ($l$) poses particularly difficult challenges to insurers and growers. Structurally, average-based production histories [@fcic_common_2020] reward increases in mean yield but may be poorly situated to confront reduced yield stability. Indeed, despite possibly guarding against this elevation in loss events [@bowles_long-term_2020; @renwick_long-term_2021], average-based production histories may discourage regenerative agriculture which may increase yield stability but not improve mean yields or even come at the cost of a slightly reduced average [@deines_recent_2023]. In contrast, if coverage levels were redefined from the current percentage based approach ($l_{\%}$) to variance ($l_{\sigma}$) as shown in Table @tbl:covformula, then improvements both in average yield and stability could be rewarded. | **Current formulation** | **Possible proposal** | -| ------------------------------------------------- | ---------------------------------------------------------------------------- | +| -------------------------------------------------------- | ---------------------------------------------------------------------------- | | $l_{\%} = \max(c_{\%} * y_{\mu} - y_{acutal}, 0)$ | $l_{\sigma} = \max(\frac{c_{\sigma} * y_{\mu}}{y_{\sigma}} - y_{acutal}, 0)$ | -For example, using historic values as guide, {{equivalentStd}} standard deviations ($c_\sigma$) would achieve the current system-wide coverage levels ($c_\% = 0.75$) but realign incentives towards a balance between a long-standing aggregate output incentive and a new resilience reward that could recognize regenerative systems and other similar practices, valuing the stability offered by some producers for the broader food system [@renwick_long-term_2021]. +Table: Change in coverage formulas. {#tbl:covformula} + +For example, using historic values as guide, {{equivalentStd}} standard deviations ($c_\sigma$) would achieve the current system-wide coverage levels ($c_\% = 0.75$) but realign incentives towards a balance between a long-standing aggregate output incentive and a new resilience reward that could recognize regenerative systems and other similar practices, valuing the stability offered by some producers for the broader food system [@renwick_long-term_2021]. Figure @fig:stdev further describes this translation between standard deviation and historic coverage levels. -![Histogram showing percent change from $y_{expected}$ for one standard deviation in each simulated unit.](./img/std.png "Histogram showing percent change from $y_{expected}$ for one standard deviation in each simulated unit."){ width=80% } +![Histogram showing percent change from $y_{expected}$ for one standard deviation in each simulated unit.](./img/std.png "Histogram showing percent change from $y_{expected}$ for one standard deviation in each simulated unit."){ width=80% #fig:stdev } Even so, federal statute caps coverage levels as percentages of production history [@cfr_crop_nodate]. Therefore, though regulators may make improvements like through 508h without congressional action, our simulations possibly suggest that the ability to incorporate climate adaptation variability may remain limited without statutory change. @@ -204,33 +229,35 @@ Note that, while we do anticipate changing historic yield averages in our simula In order to explore these simulated distributions geographically and under different scenarios, interactive open source web-based visualizations built alongside our experiments both aid in constructing our own conclusions and allow readers to consider possibilities and analysis beyond our own narrative. | **Simulator** | **Question** | **Loop** | **JG** | -| --------------------------------------------------------------------- | ------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | +| --------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | | Hyper-Parameter | How do hyper-parameters impact regressor performance? | Iteratively change neural network hyper-parameters to see influence on validation set performance. | Improving on previous hyper-parameter hypotheses. | | Distributional | How do overall simulation results change under different simulation parameters? | Iterative manipulation of parameters (geohash size, event threshold) to change loss probability and severity. | Deviating from the study’s main results. | | Neighborhood | How do simulation results change across geography and climate conditions? | Inner loop changing simulation parameters to see changes in neighborhood outcomes. Outer loop of observing changes across different views. | Identifying neighborhood clusters of concern. | | Claims | How do different regulatory choices influence grower behavior? | Iteratively change production history to see which years result in claims under different regulatory schemes. | Redefining policy to improve yield stability. | -In crafting these "explorable explanations" [@victor_explorable_2011], we draw analogies to micro-apps [@bridgwater_what_2015] or mini-games [@dellafave_designing_2014] in which the user encounters a series of small experiences that, each with distinct interaction and objectives, can only provide minimal instruction [@brown_100_2024]. As these visualizations cannot take advantage of design techniques like Hayashida-style tutorials [@pottinger_pyafscgaporg_2023], they rely on simple "loops" [@brazie_designing_2024] for immediate "juxtaposition gratification" (JG) [@jm8_secret_2024], showing fast progression after minimal input. +Table: Overview of explorable explanations. {#tbl:apps} -In crafting these "explorable explanations" [@victor_explorable_2011], we draw analogies to micro-apps [@bridgwater_what_2015] or mini-games [@dellafave_designing_2014] in which the user encounters a series of small experiences that, each with distinct interaction and objectives, can only provide minimal instruction [@brown_100_2024]. As these visualizations cannot take advantage of design techniques like Hayashida-style tutorials ([@pottinger_pyafscgaporg_2023], they rely on simple "loops" [@brazie_designing_2024] for immediate "juxtaposition gratification" (JG) [@jm8_secret_2024], showing fast progression after minimal input. +In crafting the "explorable explanations" [@victor_explorable_2011] in Table @tbl:apps, we draw analogies to micro-apps [@bridgwater_what_2015] or mini-games [@dellafave_designing_2014] in which the user encounters a series of small experiences that, each with distinct interaction and objectives, can only provide minimal instruction [@brown_100_2024]. As these visualizations cannot take advantage of design techniques like Hayashida-style tutorials [@pottinger_pyafscgaporg_2023], they rely on simple "loops" [@brazie_designing_2024] for immediate "juxtaposition gratification" (JG) [@jm8_secret_2024], showing fast progression after minimal input. -![Example simulation in our interactive tool’s geographic view. Our projections vary across different geographic areas.](./img/map.png "Example simulation in our interactive tool’s geographic view. Our projections vary across different geographic areas."){ width=85% } +![Example simulation in our interactive tool’s geographic view. Our projections vary across different geographic areas.](./img/map.png "Example simulation in our interactive tool’s geographic view. Our projections vary across different geographic areas."){ width=85% #fig:geo } -Having broken this experience into multiple micro-apps, we follow the framework from @unwin_why_2020 and note that our custom tools first serve as internal "exploratory" graphics enabling the insights detailed in our results, outlining specific observations we attribute to the use of these tools. +Having broken this experience into multiple micro-apps, we follow the framework from @unwin_why_2020 and note that our custom tools first serve as internal "exploratory" graphics enabling the insights detailed in our results, outlining specific observations we attribute to the use of these tools (see Table @tbl:insights). | **Simulator** | **Observation** | | ---------------------------------------- | --------------------------------------------------------------------------------------------------------- | | Distributional | Dichotomy of larger changes to insurer-relevant tails contrasting smaller changes to mean yield | -| Claims | Issues of using average in APH [@fcic_common_2020] | +| Claims | Issues of using average for $y_{expected}$ [@fcic_common_2020] | | Neighborhood | Model output relationships with broader climate factors, highlighting the possible systemic protective value of increased precipitation | | Hyper-parameter | Model resilience to removing individual inputs. | -Continuing to "presentation" within @unwin_why_2020, we next release these tools into a public open source website at [https://ag-adaptation-study.pub](https://ag-adaptation-study.pub). We highlight that our interactive visualizations allow for further exploration of our modeling such as different loss thresholds for other insurance products, finding relationships of outcomes to different climate variables, answering geographically specific questions beyond the scope of this study, and modification of machine learning parameters to understand performance. In preparation for this open science release, our Supplemental Materials also report briefly on experiences from a 9 person "real-world" workshop co-exploring these results similar to @pottinger_combining_2023 which offered feedback^[We collect information about the tool only, falling under "quality assurance" activity. IRB questionnaire on file.] on and refined the designs of these tools. +Table: Observations we made from our own tools in the "exploratory" graphic context of @unwin_why_2020. {#tbl:insights} + +Continuing to "presentation" within @unwin_why_2020, we next release these tools into a open source website at [https://ag-adaptation-study.pub](https://ag-adaptation-study.pub). These public interactive visualizations allow for further exploration of our modeling such as different loss thresholds for other insurance products, finding relationships of outcomes to different climate variables, answering geographically specific questions beyond the scope of this study (see Figure @tbl:apps), and modification of machine learning parameters to understand performance. In preparation for this open science release, our Supplemental Materials also report briefly on experiences from a 9 person "real-world" workshop co-exploring these results similar to @pottinger_combining_2023, offering feedback^[We collect information about the tool only, falling under "quality assurance" activity. IRB questionnaire on file.] on and refining tool design. \bigskip # Conclusion -Maize production not only suffers from climate warming's effects [@jagermeyr_climate_2021] but also adds to future climate change [@kumar_assessment_2021]. Inside this cyclic relationship, agriculture could contribute to the solution for the same global crisis that ails it [@schon_cover_2024]. In dialogue with prior work [@wang_warming_2021; @chemeris_insurance_2022], we highlight how APH currently prioritizes increased mean yield over climate adaptations benefiting stability. Furthermore, we demonstrate how existing policy structures could fail to adjust to our predicted distributional changes and how inclusion of variance into this regulatory structure may positively influence adoption of mitigating practices such as regenerative systems. This responsive shift in coverage levels could reorient incentives: balancing overall yield with longitudinal stability and more comprehensively incorporating an understanding of risk. These changes may benefit both grower and insurer without requiring practice-specific regulation. Recognizing that these structural levers require modification by policy makers, we therefore encourage scientists to further study, regulators / lawmakers to further consider, and producers to further inform these revisions to APH. These essential multi-stakeholder efforts are crucial in preparing the US food system and its insurance program for a warmer future. +Maize production not only suffers from climate warming's effects [@jagermeyr_climate_2021] but also adds to future climate change [@kumar_assessment_2021]. Inside this cyclic relationship, agriculture could crucially contribute to the necessary solution for the same global crisis that ails it [@schon_cover_2024]. In dialogue with prior work [@wang_warming_2021; @chemeris_insurance_2022], we highlight how YP currently prioritizes increased mean yield over climate adaptations benefiting stability. Furthermore, we demonstrate how existing policy structures could fail to adjust to our predicted distributional changes and how inclusion of variance into this regulatory structure may positively influence adoption of mitigating practices such as regenerative systems. This responsive shift in coverage levels could reorient incentives: balancing overall yield with longitudinal stability and more comprehensively incorporating an understanding of risk. These resilience-promoting changes may benefit both grower and insurer without requiring practice-specific regulation. Recognizing that these structural levers require modification by policy makers, we therefore encourage scientists to further study, regulators / lawmakers to further consider, and producers to further inform these revisions. These essential multi-stakeholder efforts are crucial in preparing the US food system and its insurance program for a warmer future. \bigskip diff --git a/paper/render.sh b/paper/render.sh index 32c669b4..4a60894c 100644 --- a/paper/render.sh +++ b/paper/render.sh @@ -4,9 +4,9 @@ cd .. python3 fill_template.py ./paper.md ./outputs/stats.json ./paper_filled.md -pandoc -o paper_filled.pdf --citeproc --number-sections --template=default.tex paper_filled.md -pandoc -o paper_filled.tex --citeproc --number-sections --template=default.tex paper_filled.md -pandoc -o paper_filled.docx --citeproc --number-sections paper_filled.md +pandoc -o paper_filled.pdf --citeproc --number-sections --filter pandoc-tablenos --filter pandoc-fignos --template=default.tex paper_filled.md +pandoc -o paper_filled.tex --citeproc --number-sections --filter pandoc-tablenos --filter pandoc-fignos --template=default.tex paper_filled.md +pandoc -o paper_filled.docx --citeproc --number-sections --filter pandoc-tablenos --filter pandoc-fignos paper_filled.md rm -r arxiv mkdir arxiv diff --git a/paper/requirements.txt b/paper/requirements.txt new file mode 100644 index 00000000..b5a8cd9d --- /dev/null +++ b/paper/requirements.txt @@ -0,0 +1,2 @@ +pandoc-fignos==2.4.0 +pandoc-tablenos==2.3.0 \ No newline at end of file diff --git a/paper/viz/const.py b/paper/viz/const.py index 7ac58344..a1a27936 100644 --- a/paper/viz/const.py +++ b/paper/viz/const.py @@ -25,13 +25,13 @@ BUTTON_WIDTH_COMPACT = 78 BUTTON_WIDTH_NARROW = 60 -YIELD_MIN_VALUE = 0 -YIELD_MAX_VALUE = 3000 -YIELD_INCREMENT = 250 +YIELD_MIN_VALUE = -0.4 +YIELD_MAX_VALUE = 0.4 +YIELD_INCREMENT = 0.05 -YIELD_CHANGE_MIN_VALUE = -100 -YIELD_CHANGE_MAX_VALUE = 100 -YIELD_CHANGE_INCREMENT = 10 +YIELD_CHANGE_MIN_VALUE = -0.4 +YIELD_CHANGE_MAX_VALUE = 0.4 +YIELD_CHANGE_INCREMENT = 0.05 RISK_MIN_VALUE = -30 RISK_MAX_VALUE = 60 @@ -46,52 +46,52 @@ ADAPT_INCREMENT = 10 VAR_MINS = { - 'chirps': -2, - 'rhn': -4, - 'rhx': -4, - 'svp': -0.1, - 'tmax': -5, - 'tmin': -5, - 'vpd': -0.1, - 'wbgtmax': -5 + 'chirps': -3, + 'rhn': -3, + 'rhx': -3, + 'svp': -3, + 'tmax': -3, + 'tmin': -3, + 'vpd': -3, + 'wbgtmax': -3 } VAR_MAXS = { - 'chirps': 2, - 'rhn': 4, - 'rhx': 4, - 'svp': 0.5, - 'tmax': 5, - 'tmin': 5, - 'vpd': 0.5, - 'wbgtmax': 5 + 'chirps': 3, + 'rhn': 3, + 'rhx': 3, + 'svp': 3, + 'tmax': 3, + 'tmin': 3, + 'vpd': 3, + 'wbgtmax': 3 } VAR_INCREMENTS = { 'chirps': 0.5, 'rhn': 0.5, 'rhx': 0.5, - 'svp': 0.1, - 'tmax': 1, - 'tmin': 1, - 'vpd': 0.1, - 'wbgtmax': 1 + 'svp': 0.5, + 'tmax': 0.5, + 'tmin': 0.5, + 'vpd': 0.5, + 'wbgtmax': 0.5 } MAX_COUNT = 0.1 CATEGORY_COLORS = { 'not significant': '#c0c0c0', - 'decrease': '#d95f02', - 'increase': '#1b9e77', + 'decrease': '#a6cee3', + 'increase': '#b2df8a', 'increased risk, can adapt': '#1f78b4', 'increased risk, cant adapt': '#a6cee3', 'decreased risk, can adapt': '#33a02c', 'decreased risk, cant adapt': '#b2df8a', - 'increase risk, increase variability': '#1f78b4', - 'increase risk, decrease variability': '#a6cee3', - 'decrease risk, increase variability': '#33a02c', - 'decrease risk, decrease variability': '#b2df8a' + 'increase risk, increase yield': '#a6cee3', + 'increase risk, decrease yield': '#1f78b4', + 'decrease risk, increase yield': '#b2df8a', + 'decrease risk, decrease yield': '#33a02c' } MONTH_NUMS = { @@ -113,7 +113,7 @@ MAP_SCALE_POSITIVE = ['#e6f5d0a0', '#b8e186a0', '#7fbc41a0', '#4d9221a0'] SELECTION_INSTRUCTION = ' '.join([ - 'Click select to find fields (dots below) that would be good for a pilot.', + 'Click select to find neighborhoods (dots below) that would be good for a pilot.', 'Toggle yield, risk and adaptation or map vs scatter.', 'Do different views change your answer?' ]) diff --git a/paper/viz/data_struct.py b/paper/viz/data_struct.py index 72d87a2d..d8f41645 100644 --- a/paper/viz/data_struct.py +++ b/paper/viz/data_struct.py @@ -49,6 +49,11 @@ def combine(self, other): self_count = self.get_count() other_count = other.get_count() + if self_count == 0: + return other + elif other_count == 0: + return self + self_weighted_p = self.get_p_value() * self_count other_weighted_p = other.get_p_value() * other_count weighted_p = (self_weighted_p + other_weighted_p) / (self_count + other_count) @@ -99,6 +104,11 @@ def get_count(self): def combine(self, other): self_count = self.get_count() other_count = other.get_count() + + if self_count == 0: + return other + elif other_count == 0: + return self self_weighted_mean = self.get_mean() * self_count other_weighted_mean = other.get_mean() * other_count @@ -107,7 +117,11 @@ def combine(self, other): self_var_piece = (self_count - 1) * self.get_std()**2 other_var_piece = (other_count - 1) * other.get_std()**2 pooled_count = self_count + other_count - 2 - new_std = math.sqrt((self_var_piece + other_var_piece) / pooled_count) + + if pooled_count == 0: + new_std = (self.get_std() + other.get_std()) / 2 + else: + new_std = math.sqrt((self_var_piece + other_var_piece) / pooled_count) new_count = self_count + other_count return YieldDistribution(new_mean, new_std, new_count) @@ -132,6 +146,12 @@ def get_p_value(self): def combine(self, other): self_count = self.get_predicted().get_count() other_count = other.get_predicted().get_count() + + if self_count == 0: + return other + elif other_count == 0: + return self + self_weighted_p = self.get_p_value() * self_count other_weighted_p = other.get_p_value() * other_count weighted_p = (self_weighted_p + other_weighted_p) / (self_count + other_count) diff --git a/paper/viz/hist_viz.py b/paper/viz/hist_viz.py index 1d482c45..a5f6a5a9 100644 --- a/paper/viz/hist_viz.py +++ b/paper/viz/hist_viz.py @@ -537,7 +537,7 @@ def _draw_title(self): self._sketch.draw_text( SUB_CHART_WIDTH / 2 + 40, 30, - 'Histogram of Simulated Change in Risk Unit-Level Yields' + 'Histogram of Change in Risk Unit-Level Yields Relative to Expected (Avg Yield)' ) self._sketch.pop_style() diff --git a/paper/viz/preprocess.py b/paper/viz/preprocess.py index f14d98aa..ae3ce06f 100644 --- a/paper/viz/preprocess.py +++ b/paper/viz/preprocess.py @@ -182,14 +182,12 @@ def make_point_risk(record): risk_change = record.get_predicted_risk().get_risk_change() risk_p = record.get_predicted_risk().get_p_value() - std_before = record.get_yield_comparison().get_prior().get_std_percent() - std_after = record.get_yield_comparison().get_predicted().get_std_percent() - std_change = std_after - std_before + yield_change = record.get_yield_comparison().get_predicted().get_mean() - if std_change > 0: - var_str = 'increase variability' + if yield_change > 0: + var_str = 'increase yield' else: - var_str = 'decrease variability' + var_str = 'decrease yield' if risk_p > p_threshold: category = 'not significant' @@ -199,7 +197,7 @@ def make_point_risk(record): category = 'decrease risk, ' + var_str var_x = get_var_x(record) - effective_x = std_change * 100 if var_x is None else var_x + effective_x = yield_change if var_x is None else var_x return ScatterPoint( record.get_geohash(), diff --git a/paper/viz/results_viz.py b/paper/viz/results_viz.py index 081ec0ca..01d8bf9d 100644 --- a/paper/viz/results_viz.py +++ b/paper/viz/results_viz.py @@ -47,7 +47,7 @@ def __init__(self, target, loading_id, default_configuration=None, data_loc=None self._config = default_configuration else: self._config = buttons.Configuration( - '2030 conditions', + '2050 conditions', '1 year', 'yield', 'scatter', @@ -55,7 +55,7 @@ def __init__(self, target, loading_id, default_configuration=None, data_loc=None 'Bonferroni', 'significant only', 'no var', - 'jun', + 'jul', '75% cov' ) diff --git a/paper/viz/scatter.py b/paper/viz/scatter.py index 1b8fe8e7..8a7155f7 100644 --- a/paper/viz/scatter.py +++ b/paper/viz/scatter.py @@ -115,43 +115,43 @@ def _make_scatter_image(self, records, metric, variable): max_value_y = const.YIELD_MAX_VALUE increment_x = const.YIELD_INCREMENT increment_y = const.YIELD_INCREMENT - format_str_x = '%.0f' - format_str_y = '%.0f' - vert_title = 'Predicted Mean Yield' - horiz_title = 'Historic Mean Yield' + format_str_x = lambda x: '%+.0f%%' % (x * 100) + format_str_y = lambda x: '%+.0f%%' % (x * 100) + vert_title = 'Change from Yield Expectation (Climate Change)' + horiz_title = 'Change from Yield Expectation (Counterfactual)' elif metric == 'yieldVar': min_value_y = const.YIELD_CHANGE_MIN_VALUE max_value_y = const.YIELD_CHANGE_MAX_VALUE increment_y = const.YIELD_CHANGE_INCREMENT - format_str_y = '%+.0f' - vert_title = 'Mean Yield Change' + format_str_y = lambda x: '%+.0f%%' % (x * 100) + vert_title = 'Change from Yield Expectation (Climate Change)' min_value_x = const.VAR_MINS[variable] max_value_x = const.VAR_MAXS[variable] increment_x = const.VAR_INCREMENTS[variable] - horiz_title = 'Mean Change (%s)' % variable - format_str_x = '%+.1f' + horiz_title = 'Mean Change (%s, z)' % variable + format_str_x = lambda x: '%+.1f%%' % x elif metric == 'risk': min_value_y = const.RISK_MIN_VALUE max_value_y = const.RISK_MAX_VALUE - min_value_x = const.STD_MIN_VALUE - max_value_x = const.STD_MAX_VALUE + min_value_x = const.YIELD_CHANGE_MIN_VALUE + max_value_x = const.YIELD_CHANGE_MAX_VALUE increment_y = const.RISK_INCREMENT - increment_x = const.STD_INCREMENT - format_str_x = '%+.0f%%' - format_str_y = '%+.0f%%' - vert_title = 'Change in Loss Probabilty' - horiz_title = 'Change in Variability' + increment_x = const.YIELD_CHANGE_INCREMENT + format_str_x = lambda x: '%+.0f%%' % (x * 100) + format_str_y = lambda x: '%+.0f%%' % x + vert_title = 'Change in Claims Rate' + horiz_title = 'Change from Yield Expectation (Climate Change)' elif metric == 'riskVar': min_value_y = const.RISK_MIN_VALUE max_value_y = const.RISK_MAX_VALUE increment_y = const.RISK_INCREMENT - format_str_y = '%+.0f%%' - vert_title = 'Change in Loss Probabilty' + format_str_y = lambda x: '%+.0f%%' % x + vert_title = 'Change in Claims Rate' min_value_x = const.VAR_MINS[variable] max_value_x = const.VAR_MAXS[variable] increment_x = const.VAR_INCREMENTS[variable] - horiz_title = 'Mean Change (%s)' % variable - format_str_x = '%+.1f' + horiz_title = 'Mean Change (%s, z)' % variable + format_str_x = lambda x: '%+.1f%%' % x elif metric == 'adaptation': min_value_x = const.ADAPT_MIN_VALUE min_value_y = const.ADAPT_MIN_VALUE @@ -159,21 +159,21 @@ def _make_scatter_image(self, records, metric, variable): max_value_y = const.ADAPT_MAX_VALUE increment_x = const.ADAPT_INCREMENT increment_y = const.ADAPT_INCREMENT - format_str_x = '%+.0f%%' - format_str_y = '%+.0f%%' - vert_title = 'Change in Loss Probabilty' + format_str_x = lambda x: '%+.0f%%' % x + format_str_y = lambda x: '%+.0f%%' % x + vert_title = 'Change in Claims Rate' horiz_title = 'Adaptation Effect' elif metric == 'adaptationVar': min_value_y = const.ADAPT_MIN_VALUE max_value_y = const.ADAPT_MAX_VALUE increment_y = const.ADAPT_INCREMENT - format_str_y = '%+.0f%%' + format_str_y = lambda x: '%+.0f%%' % x vert_title = 'Change Catastrophic Probabilty with Adapt' min_value_x = const.VAR_MINS[variable] max_value_x = const.VAR_MAXS[variable] increment_x = const.VAR_INCREMENTS[variable] - horiz_title = 'Mean Change (%s)' % variable - format_str_x = '%+.1f' + horiz_title = 'Mean Change (%s, z)' % variable + format_str_x = lambda x: '%+.1f' % x else: raise RuntimeError('Unknown metric ' + metric) @@ -297,11 +297,16 @@ def draw_horiz_axis(): self._sketch.draw_text( x_pos, effective_height + 12, - format_str_x % current_value + format_str_x(current_value) ) self._sketch.set_fill(const.EMBEDDED_BAR_COLOR) - height = get_hist_size(counts.get('%.1f' % current_value, 0) / total) + + if total == 0: + height = 0 + else: + height = get_hist_size(counts.get('%.1f' % current_value, 0) / total) + if height > 0.1: self._sketch.draw_rect( x_pos - 5, @@ -356,11 +361,16 @@ def draw_vert_axis(): self._sketch.draw_text( -1, y_pos, - format_str_y % current_value + format_str_y(current_value) ) self._sketch.set_fill(const.EMBEDDED_BAR_COLOR) - width = get_hist_size(counts.get('%.1f' % current_value, 0) / total) + + if total == 0: + width = 0 + else: + width = get_hist_size(counts.get('%.1f' % current_value, 0) / total) + if width > 0.1: self._sketch.draw_rect( -1 - width, diff --git a/paper/viz/sweep_viz.py b/paper/viz/sweep_viz.py index 9e700e43..748d7b7d 100644 --- a/paper/viz/sweep_viz.py +++ b/paper/viz/sweep_viz.py @@ -211,7 +211,7 @@ def __init__(self, sketch, x, y, on_config_change): y, 'Layers', ['1 layer', '2 layers', '3 layers', '4 layers', '5 layers'], - '5 layers', + '3 layers', lambda x: self._change_layers(x) ) @@ -223,17 +223,11 @@ def __init__(self, sketch, x, y, on_config_change): 'L2', [ 'No L2', - '0.1', - '0.2', - '0.3', - '0.4', - '0.5', - '0.6', - '0.7', - '0.8', - '0.9' + '0.001', + '0.010', + '0.100' ], - 'No L2', + '0.010', lambda x: self._change_l2(x) ) @@ -245,17 +239,12 @@ def __init__(self, sketch, x, y, on_config_change): 'Dropout', [ 'No Dropout', - '0.1', - '0.2', - '0.3', - '0.4', - '0.5', - '0.6', - '0.7', - '0.8', - '0.9' + '0.01', + '0.05', + '0.10', + '0.50' ], - 'No Dropout', + '0.05', lambda x: self._change_dropout(x) ) @@ -446,10 +435,14 @@ def _draw_contents(self): self._sketch.draw_buffer(0, 0, 'sweep-points') def _get_x(self, val): - return val / 2 * (self._width - 80 - 20) + 80 + if val > 0.4: + val = 0.4 + return val * 100 / 40 * (self._width - 80 - 20) + 80 def _get_y(self, val): - offset = val / 2 * (self._height - 50 - 20) + 50 + if val > 0.4: + val = 0.4 + offset = val * 100 / 40 * (self._height - 50 - 20) + 50 return self._height - offset def _draw_horiz_axis(self): @@ -461,25 +454,25 @@ def _draw_horiz_axis(self): self._sketch.set_text_font(const.FONT_SRC, 11) self._sketch.set_text_align('center', 'top') - tick_points_int = range(0, 225, 25) + tick_points_int = range(0, 45, 5) tick_points_float = map(lambda x: x / 100, tick_points_int) for val in tick_points_float: self._sketch.draw_text( self._get_x(val), self._height - 45, - '%.2f' % val + ('>' if val >= 0.399 else '') + ('%d%%' % round(val * 100)) ) self._sketch.set_text_align('center', 'center') self._sketch.push_transform() self._sketch.translate( - self._get_x(1), + self._get_x(0.20), self._height - 20 ) self._sketch.draw_text( 0, 0, - 'Error predicting yield distribution mean (z, MAE)' + 'Error predicting yield distribution mean (%, MAE)' ) self._sketch.pop_transform() @@ -495,13 +488,13 @@ def _draw_vert_axis(self): self._sketch.set_text_font(const.FONT_SRC, 11) self._sketch.set_text_align('right', 'center') - tick_points_int = range(0, 225, 25) + tick_points_int = range(0, 45, 5) tick_points_float = map(lambda x: x / 100, tick_points_int) for val in tick_points_float: self._sketch.draw_text( 48, self._get_y(val), - '%.2f' % val + ('>' if val >= 0.399 else '') + ('%d%%' % round(val * 100)) ) self._sketch.set_text_align('center', 'center') @@ -509,13 +502,13 @@ def _draw_vert_axis(self): self._sketch.set_angle_mode('degrees') self._sketch.translate( 12, - self._get_y(1) + self._get_y(0.20) ) self._sketch.rotate(-90) self._sketch.draw_text( 0, 0, - 'Error predicting yield distribution std (z, MAE)' + 'Error predicting yield distribution std (%, MAE)' ) self._sketch.pop_transform() diff --git a/refine/unit_size.json b/refine/unit_size.json index aa8d0ac4..867e4acc 100644 --- a/refine/unit_size.json +++ b/refine/unit_size.json @@ -231,46 +231,6 @@ }, "description": "Remove rows" }, - { - "op": "core/row-removal", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "UnitType", - "expression": "value", - "columnName": "UnitType", - "invert": true, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": "Enterprise Unit", - "l": "Enterprise Unit" - } - }, - { - "v": { - "v": "Whole Farm Unit", - "l": "Whole Farm Unit" - } - }, - { - "v": { - "v": "Basic Unit", - "l": "Basic Unit" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "description": "Remove rows" - }, { "op": "core/row-removal", "engineConfig": { diff --git a/run.sh b/run.sh index 089e594e..54985d95 100644 --- a/run.sh +++ b/run.sh @@ -1 +1 @@ -python3 -m luigi --module export_tasks CombinedTasksRecordTask --local-scheduler \ No newline at end of file +python3 -m luigi --module breakpoint_tasks ExecuteSupplementalTasksWithCluster --local-scheduler \ No newline at end of file diff --git a/stats_tasks.py b/stats_tasks.py index 9f5c0dd0..6f7686ea 100644 --- a/stats_tasks.py +++ b/stats_tasks.py @@ -15,7 +15,7 @@ def format_percent(target): - return '%d\\%%' % round(target * 100) + return '%.1f\\%%' % (target * 100) def format_severity(target):