Skip to content

Commit

Permalink
Merge pull request #420 from USEPA/bea_pce
Browse files Browse the repository at this point in the history
BEA Personal Consumption Expenditures by state
  • Loading branch information
bl-young authored Oct 22, 2024
2 parents f18e7d8 + 76f5f96 commit 7aad90b
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 0 deletions.
98 changes: 98 additions & 0 deletions flowsa/data_source_scripts/BEA_PCE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# BEA_PCE.py (flowsa)
# !/usr/bin/env python3
# coding=utf-8
"""
"""

import json
import pandas as pd
import numpy as np
from flowsa.location import get_state_FIPS
from flowsa.flowbyfunctions import assign_fips_location_system


def bea_pce_url_helper(*, build_url, config, **_):
"""
This helper function uses the "build_url" input from generateflowbyactivity.py,
which is a base url for data imports that requires parts of the url text
string to be replaced with info specific to the data year. This function
does not parse the data, only modifies the urls from which data is
obtained.
:param build_url: string, base url
:param config: dictionary, items in FBA method yaml
:return: list, urls to call, concat, parse, format into Flow-By-Activity
format
"""
urls = []
for state in get_state_FIPS()['FIPS']:
url1 = build_url.replace('__stateFIPS__', state)
for table in config['tables']:
url = url1.replace('__table__', table)
urls.append(url)

return urls


def bea_pce_call(*, resp, **_):
"""
Convert response for calling url to pandas dataframe,
begin parsing df into FBA format
:param resp: df, response from url call
:return: pandas dataframe of original source data
"""
try:
json_load = json.loads(resp.text)
df = pd.DataFrame(data=json_load['BEAAPI']['Results']['Data'])
except:
df = pd.DataFrame()
finally:
return df


def bea_pce_parse(*, df_list, year, **_):
"""
Combine, parse, and format the provided dataframes
:param df_list: list of dataframes to concat and format
:param args: dictionary, used to run generateflowbyactivity.py
('year' and 'source')
:return: df, parsed and partially formatted to flowbyactivity
specifications
"""
# Concat dataframes
df = pd.concat(df_list, ignore_index=True)

df = (df.
rename(columns={'GeoFips': 'Location',
'TimePeriod': 'Year',
'CL_UNIT': 'Unit',
'Description': 'ActivityProducedBy',
'Code': 'Description',
})
.assign(FlowAmount = lambda x: x['DataValue'].astype(float))
.assign(FlowName = 'Personal consumption expenditures')
.drop(columns=['UNIT_MULT', 'GeoName', 'DataValue'], errors='ignore')
)

df['Unit'] = np.where(df['Description'].str.startswith('SAPCE2'),
'Dollars / p', df['Unit'])

# add location system based on year of data
df = assign_fips_location_system(df, year)
# add hard code data
df['SourceName'] = 'BEA_PCE'
df['Class'] = 'Money'
# Add tmp DQ scores
df['DataReliability'] = 5
df['DataCollection'] = 5
df['Compartment'] = None
df['FlowType'] = "ELEMENTARY_FLOW"

return df

if __name__ == "__main__":
import flowsa
flowsa.generateflowbyactivity.main(source='BEA_PCE', year=2023)
fba = pd.DataFrame()
for y in range(2023, 2024):
fba = pd.concat([fba, flowsa.getFlowByActivity('BEA_PCE', y)],
ignore_index=True)
3 changes: 3 additions & 0 deletions flowsa/generateflowbyactivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import argparse
import pandas as pd
from urllib import parse
import time
import flowsa
from esupy.processed_data_mgmt import write_df_to_file
from esupy.remote import make_url_request
Expand Down Expand Up @@ -102,6 +103,7 @@ def call_urls(*, url_list, source, year, config):
# identify if url request requires cookies set
set_cookies = config.get('allow_http_request_cookies')
confirm_gdrive = config.get('confirm_gdrive')
pause = config.get('time_delay', 0) # in seconds

# create dataframes list by iterating through url list
data_frames_list = []
Expand All @@ -123,6 +125,7 @@ def call_urls(*, url_list, source, year, config):
data_frames_list.append(df)
elif isinstance(df, list):
data_frames_list.extend(df)
time.sleep(pause)

return data_frames_list

Expand Down
46 changes: 46 additions & 0 deletions flowsa/methods/flowbyactivitymethods/BEA_PCE.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
author: US Bureau of Economic Analysis
source_name: Personal Consumption Expenditures by State
source_url: https://www.bea.gov/data/consumer-spending/state
bib_id: BEA
api_name: BEA
api_key_required: True
url:
base_url: https://apps.bea.gov/api/data/?
api_path: ''
url_params:
method: GetData
DataSetName: Regional
TableName: __table__
GeoFIPS: __stateFIPS__ # STATE for all states
LineCode: ALL # can't use ALL when selecting all states
ResultFormat: json
Year: __year__
UserID: __apiKey__
## See Appendix N of https://apps.bea.gov/api/_pdf/bea_web_service_api_user_guide.pdf

url_replace_fxn: !script_function:BEA_PCE bea_pce_url_helper
call_response_fxn: !script_function:BEA_PCE bea_pce_call
parse_response_fxn: !script_function:BEA_PCE bea_pce_parse
time_delay: 1 # pause 1 second between requests
## BEA limits to 100 requests per minute / 100 MB data per minute
## before setting a time-out period of one hour.

tables:
- SAPCE1 # Personal consumption expenditures by major type of product
- SAPCE2 # Per capita personal consumption expenditures by major type of product
- SAPCE3 # Personal consumption expenditures by type of product
- SAPCE4 # Personal consumption expenditures by function

years: # 1997 - 2023
- 2012
- 2013
- 2014
- 2015
- 2016
- 2017
- 2018
- 2019
- 2020
- 2021
- 2022
- 2023
1 change: 1 addition & 0 deletions flowsa/methods/flowbyactivitymethods/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ url_replace_fxn: name of the source specific function that replaces the dynamic
call_response_fxn: name of the source specific function that specifies how data should be loaded
parse_response_fxn: name of the source specific function that parses and formats the dataframe
call_all_years: bool, allows the passing of a year range to generateflowbyactivity.main() while only calling and parsing the url a single time
time_delay: int (in seconds), allows pausing between requests
years:
#years of data as separate lines like - 2015
Expand Down

0 comments on commit 7aad90b

Please sign in to comment.