From bb847f3afdfe408e859cd034366f9dceeb99f99d Mon Sep 17 00:00:00 2001 From: ysrivas08 Date: Wed, 8 Jan 2025 14:39:32 -0500 Subject: [PATCH] Updated state GHGI scripts to include recent data realease for years 2021, 2022 --- .../data_source_scripts/EPA_StateGHGI_YS.py | 235 ++++++++++++++++++ .../EPA_StateGHGI_YS.yaml | 46 ++++ 2 files changed, 281 insertions(+) create mode 100644 flowsa/data_source_scripts/EPA_StateGHGI_YS.py create mode 100644 flowsa/methods/flowbyactivitymethods/EPA_StateGHGI_YS.yaml diff --git a/flowsa/data_source_scripts/EPA_StateGHGI_YS.py b/flowsa/data_source_scripts/EPA_StateGHGI_YS.py new file mode 100644 index 00000000..6a833a92 --- /dev/null +++ b/flowsa/data_source_scripts/EPA_StateGHGI_YS.py @@ -0,0 +1,235 @@ +# EPA_StateGHGI.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 +""" +Inventory of US GHGs from EPA disaggregated to States +""" +import pandas as pd +import io +from zipfile import ZipFile + +import flowsa.flowbyactivity +from flowsa.flowbyactivity import FlowByActivity +from flowsa.flowbysector import FlowBySector +from flowsa.flowsa_log import log +from flowsa.location import apply_county_FIPS +from flowsa.flowbyfunctions import assign_fips_location_system +import flowsa.exceptions + + +def epa_state_ghgi_call(*, resp, config, **_): + """ + Convert response for calling url to pandas dataframe + :param resp: response from url call + :param config: dictionary, items in FBA method yaml + :return: pandas dataframe of original source data + """ + with ZipFile(io.BytesIO(resp.content)) as z: + df = pd.read_excel(z.open(config['file']), + sheet_name=config['sheet']) + return df + +def epa_state_ghgi_parse(*, df_list, source, year, config, **_): + """ + Combine, parse, and format the provided dataframes + :param df_list: list of dataframes to concat and format + :param year: year + :param config: dictionary, items in FBA method yaml + :return: df, parsed and partially formatted to flowbyactivity + specifications + """ + data_df = pd.concat(df_list) + + activity_cols = ['econ_sector', 'econ_subsector', 'subsector', + 'category', 'fuel1', 'fuel2', 'sub_category_1', + 'sub_category_2', 'sub_category_3', 'sub_category_4', 'sub_category_5'] + + states = data_df[['geo_ref']].drop_duplicates() + flows = data_df[['ghg_category']].drop_duplicates() + + df = (data_df.melt(id_vars = activity_cols + ['geo_ref'] + ['ghg_category'], + value_vars=f'Y{year}', + var_name = 'Year', + value_name = 'FlowAmount') + .assign(Year = year) + .assign(Unit = 'Tg') # #########TODO confirm units MMT CO2e + .assign(FlowType = 'ELEMENTARY_FLOW') + .assign(SourceName = source) + .assign(Class = 'Chemicals') + .assign(Compartment = 'air') + .rename(columns={'geo_ref': 'State', + 'ghg_category': 'FlowName'}) + .assign(ActivityProducedBy = lambda x: x[activity_cols] + .apply(lambda row: " - ".join( + row.dropna().drop_duplicates().astype(str)), + axis=1)) + .drop(columns=activity_cols) + ) + + activities = df[['ActivityProducedBy']].drop_duplicates() + + df = apply_county_FIPS(df) + df = assign_fips_location_system(df, '2015') + df.drop(columns=['County'], inplace=True) + + return df + + +def tag_biogenic_activities(fba, source_dict, **_): + """ + clean_fba_before_mapping_df_fxn to tag emissions from passed activities + as biogenic. Activities passed as list in paramter 'activity_list'. + """ + a_list = source_dict.get('activity_list') + if a_list is None: + raise flowsa.exceptions.FBSMethodConstructionError( + message="Activities to tag must be passed in FBS parameter " + "'activity_list'") + fba.loc[fba['ActivityProducedBy'].isin(a_list), + 'FlowName'] = fba['FlowName'] + ' - biogenic' + + return fba + + +def allocate_flows_by_fuel(fba: FlowByActivity, **_) -> FlowByActivity: + """ + clean_fba_before_activity_sets fxn to estimate CH4 and N2O emissions by + fuel type, using ratios derived from the national inventory as proxy + + returns a FBA that has increased in length x-times based on the number of + fuels; Fuel is added to "Description" field; total FlowAmount remains + unchanged. + """ + attributes_to_save = { + attr: getattr(fba, attr) for attr in fba._metadata + ['_metadata'] + } + + year = fba.config.get('year') + # combine lists of activities from CO2 activity set + alist = fba.config['clean_parameter']['flow_ratio_source'] + if any(isinstance(i, list) for i in alist): + # pulled from !index, so list of lists + activity_list = sum(alist, []) + else: + activity_list = alist + source_fba = pd.concat([ + flowsa.flowbyactivity.getFlowByActivity(x, year) for x in + fba.config['clean_parameter']['fba_source'] + ], ignore_index=True) + + sector = fba.config['clean_parameter']['sector'] + + # align fuel names from National GHGI (keys) with StateGHGI (values) + fuels = {'Natural Gas': 'Natural Gas', + 'Coal': 'Coal', + 'Fuel Oil': 'Petroleum'} + + df_list = [] + for f in fuels.keys(): + df = (source_fba.query(f'ActivityProducedBy == "{f} {sector}"') + [['FlowName', 'FlowAmount']] + .assign(Fuel=f) + ) + df_list.append(df) + # calculate ratio of flow to CO2 for each fuel (in CO2e) + ratios = (pd.concat(df_list, ignore_index=True) + .pivot_table(columns='FlowName', + index='Fuel', + values='FlowAmount') + .assign(CH4=lambda x: x['CH4'] / x['CO2']) + .assign(N2O=lambda x: x['N2O'] / x['CO2']) + .drop(columns='CO2') + .fillna(0) + ) + + # prepare dataframe from StateGHGI including CO2 flows by fuel type + fba1 = (pd.concat([( + flowsa.flowbyactivity.getFlowByActivity('EPA_StateGHGI', year) + .query('ActivityProducedBy in @activity_list')), + fba.copy()], + ignore_index=True) + .assign(Fuel=lambda x: x['ActivityProducedBy'] + .str.rsplit(' - ', n=1, expand=True)[1]) + ) + + # Derive state CH4 and N2O emissions by fuel type using fuel specific ratios + fba2 = (fba1.query('FlowName == "CO2"') + .assign(Fuel=lambda x: x['Fuel'].replace( + dict((v,k) for k,v in fuels.items()))) + .merge(ratios.reset_index()) + .assign(CH4=lambda x: x['CH4'] * x['FlowAmount']) + .assign(N2O=lambda x: x['N2O'] * x['FlowAmount']) + .melt(id_vars=['Location', 'Fuel'], + value_vars=['CH4', 'N2O'], + var_name='FlowName') + .pivot_table(columns='Fuel', + index=['Location', 'FlowName'], + values='value') + ) + fba2 = pd.DataFrame(fba2).div(fba2.sum(axis=1), axis=0) + + # Maintain source flow amount, merge in state ratios by fuel type + fba3 = (fba1.merge(fba2.reset_index()) + .melt(id_vars=[c for c in fba1 if c not in fuels.keys()], + value_vars=fuels.keys()) + .assign(Description=lambda x: x['variable'].replace(fuels)) + .assign(FlowAmount=lambda x: x['FlowAmount'] * x['value']) + .drop(columns=['Fuel', 'variable', 'value']) + ) + + if round(fba3.FlowAmount.sum(), 6) != round(fba.FlowAmount.sum(), 6): + log.warning('Error: totals do not match when splitting CH4 and N2O by ' + 'fuel type') + + new_fba = FlowByActivity(fba3) + for attr in attributes_to_save: + setattr(new_fba, attr, attributes_to_save[attr]) + + return new_fba + + +def allocate_industrial_combustion(fba: FlowByActivity, **_) -> FlowByActivity: + """ + Split industrial combustion emissions into two buckets to be further allocated. + + clean_fba_before_activity_sets. Calculate the percentage of fuel consumption + captured in EIA MECS relative to national GHGI. Create new activities to + distinguish those which use EIA MECS as allocation source and those that + use alternate source. + """ + from flowsa.data_source_scripts.EPA_GHGI import get_manufacturing_energy_ratios + pct_dict = get_manufacturing_energy_ratios(fba.config.get('clean_parameter')) + + # activities reflect flows in A_14 and 3_8 and 3_9 + alist = fba.config.get('clean_parameter')['activities_to_split'] + activities_to_split = {a: a.rsplit(' - ')[-1] for a in alist} + + for activity, fuel in activities_to_split.items(): + df_subset = fba.loc[fba['ActivityProducedBy'] == activity].reset_index(drop=True) + if len(df_subset) == 0: + continue + df_subset['FlowAmount'] = df_subset['FlowAmount'] * pct_dict[fuel] + df_subset['ActivityProducedBy'] = f"{activity} - Manufacturing" + fba.loc[fba['ActivityProducedBy'] == activity, + 'FlowAmount'] = fba['FlowAmount'] * (1-pct_dict[fuel]) + fba = pd.concat([fba, df_subset], ignore_index=True) + + return fba + + +def drop_negative_values(fbs: FlowBySector, **_) -> FlowBySector: + ## In some cases, after handling adjustments for reassigning emissions in + ## the StateGHGI, sectors can have negative emissions after aggregating by + ## sector. Remove these negative values so that that state does not get + ## any emissions from that sector. clean_fbs_after_aggregation fxn + fbs = fbs.query('FlowAmount >= 0').reset_index(drop=True) + + return fbs + + +if __name__ == '__main__': + import flowsa + flowsa.generateflowbyactivity.main(source='EPA_StateGHGI', year='2020') + fba = flowsa.flowbyactivity.getFlowByActivity('EPA_StateGHGI', '2020') + + \ No newline at end of file diff --git a/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI_YS.yaml b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI_YS.yaml new file mode 100644 index 00000000..52da6400 --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/EPA_StateGHGI_YS.yaml @@ -0,0 +1,46 @@ +author: US Environmental Protection Agency +source_name: 'State Greenhouse Gas Inventories' +source_url: 'https://www.epa.gov/ghgemissions/state-ghg-emissions-and-removals' +bib_id: '' +format: zip # .zip file with .xlsx file +url: + base_url: 'https://www.epa.gov/system/files/other-files/2024-09/allstateghgdata90-22_v082924.zip' + +call_response_fxn: !script_function:EPA_StateGHGI_YS epa_state_ghgi_call +parse_response_fxn: !script_function:EPA_StateGHGI_YS epa_state_ghgi_parse +file: 'AllStateGHGData90-22_v082924.xlsx' +sheet: 'Data by Economic Sectors' +years: +- 2022 +- 2021 +- 2020 +- 2019 +- 2018 +- 2017 +- 2016 +- 2015 +- 2014 +- 2013 +- 2012 +- 2011 +- 2010 +- 2009 +- 2008 +- 2007 +- 2006 +- 2005 +- 2004 +- 2003 +- 2002 +- 2001 +- 2000 +- 1999 +- 1998 +- 1997 +- 1996 +- 1995 +- 1994 +- 1993 +- 1992 +- 1991 +- 1990