simplify urls before 2012 by joining 10 counties at a time, extend ba…

…ck to 1997
USEPA · Nov 7, 2024 · 1396a85 · 1396a85
1 parent 7c7eeff
commit 1396a85
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 45 deletions.
diff --git a/flowsa/data_source_scripts/Census_CBP.py b/flowsa/data_source_scripts/Census_CBP.py
@@ -15,6 +15,7 @@
 import numpy as np
 from flowsa.location import get_all_state_FIPS_2, get_county_FIPS
 from flowsa.flowbyfunctions import assign_fips_location_system
+from flowsa.flowsa_log import log
 
 
 def Census_CBP_URL_helper(*, build_url, year, **_):
@@ -33,58 +34,39 @@ def Census_CBP_URL_helper(*, build_url, year, **_):
     # This section gets the census data by county instead of by state.
     # This is only for years 2010 and 2011. This is done because the State
     # query that gets all counties returns too many results and errors out.
-    if year in ['2010', '2011']:
+    if int(year) <= 2012:
+        if 2008 <= int(year) <= 2011:
+            naics = "NAICS2007"
+        elif 2003 <= int(year) <= 2007:
+            naics = "NAICS2002"
+        elif 1997 <= int(year) <= 2002:
+            naics = "NAICS1997"
+        else:
+            raise flowsa.exceptions.FBANotAvailableError()
         county_fips_df = get_county_FIPS('2010')
-        county_fips = county_fips_df.FIPS
-        for d in county_fips:
-            url = build_url
-            state_digit = str(d[0]) + str(d[1])
-            county_digit = str(d[2]) + str(d[3]) + str(d[4])
-            url = url.replace("__NAICS__", "NAICS2007")
-            url = url.replace("__stateFIPS__", state_digit)
-            url = url.replace("__countyFIPS__", county_digit)
-
-            if year == "2010":
-                # These are the counties where data is not available.
-                # s signifies state code and y indicates year.
-                s_02_y_10 = ["105", "195", "198", "230", "275"]
-                s_15_y_10 = ["005"]
-                s_48_y_10 = ["269"]
-
-                # There are specific counties in various states for the year
-                # 2010 that do not have data. For these counties a URL is not
-                # generated as if there is no data then an error occurs.
-                if state_digit == "02" and county_digit in s_02_y_10 or \
-                        state_digit == "15" and county_digit in s_15_y_10 or \
-                        state_digit == "48" and county_digit in s_48_y_10:
-                    pass
-                else:
-                    urls_census.append(url)
-            else:
-                # These are the counties where data is not available.
-                # s signifies state code and y indicates year.
-                s_02_y_11 = ["105", "195", "198", "230", "275"]
-                s_15_y_11 = ["005"]
-                s_48_y_11 = ["269", "301"]
+        county_fips = county_fips_df.groupby('State')
+        for state, counties_df in county_fips:
+            # grab first two digits for the state FIPS
+            state_digit = counties_df.FIPS.iloc[0][:2]
+            for i in range(0, len(counties_df), 20):
+                # Group counties in chunks of 20
+                chunk = counties_df.iloc[i:i+20]['FIPS']
+                url = build_url
+                county_digit = ",".join(chunk.str[2:5])
+                url = url.replace("__NAICS__", naics)
+                url = url.replace("__stateFIPS__", state_digit)
+                url = url.replace("__countyFIPS__", county_digit)
+                urls_census.append(url)
 
-                # There are specific counties in various states for the year
-                # 2011 that do not have data. For these counties a URL is
-                # not generated as if there is no data then an error occurs.
-                if state_digit == "02" and county_digit in s_02_y_11 or \
-                        state_digit == "15" and county_digit in s_15_y_11 or \
-                        state_digit == "48" and county_digit in s_48_y_11:
-                    pass
-                else:
-                    urls_census.append(url)
     else:
         FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
         for state in FIPS_2:
             url = build_url
             url = url.replace("__stateFIPS__", state)
             # specified NAICS code year depends on year of data
-            if year in ['2012', '2013', '2014', '2015', '2016']:
+            if 2012 <= int(year) <= 2016:
                 url = url.replace("__NAICS__", "NAICS2012")
-            else:
+            elif int(year) >= 2017:
                 url = url.replace("__NAICS__", "NAICS2017")
             url = url.replace("__countyFIPS__", "*")
             urls_census.append(url)
@@ -99,6 +81,10 @@ def census_cbp_call(*, resp, **_):
     :param resp: df, response from url call
     :return: pandas dataframe of original source data
     """
+    if(resp.status_code == 204):
+        # No content warning, return empty dataframe
+        log.warning(f"No content found for {resp.url}")
+        return pd.DataFrame()
     cbp_json = json.loads(resp.text)
     # convert response to dataframe
     df_census = pd.DataFrame(
@@ -186,5 +172,5 @@ def census_cbp_parse(*, df_list, year, **_):
 
 if __name__ == "__main__":
     import flowsa
-    flowsa.generateflowbyactivity.main(source='Census_CBP', year='2016-2018')
-    fba = flowsa.getFlowByActivity('Census_CBP', 2017)
+    flowsa.generateflowbyactivity.main(source='Census_CBP', year='2002-2010')
+    fba = flowsa.getFlowByActivity('Census_CBP', 2001)
diff --git a/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml b/flowsa/methods/flowbyactivitymethods/Census_CBP.yaml
@@ -20,6 +20,19 @@ url_replace_fxn: !script_function:Census_CBP Census_CBP_URL_helper
 call_response_fxn: !script_function:Census_CBP census_cbp_call
 parse_response_fxn: !script_function:Census_CBP census_cbp_parse
 years:
+- 1997
+- 1998
+- 1999
+- 2000
+- 2001
+- 2002
+- 2003
+- 2004
+- 2005
+- 2006
+- 2007
+- 2008
+- 2009
 - 2010
 - 2011
 - 2012
-Original file line number
+Diff line change
@@ Expand Up @@
     call_response_fxn: !script_function:Census_CBP census_cbp_call
     parse_response_fxn: !script_function:Census_CBP census_cbp_parse
     years:
+    - 1997
+    - 1998
+    - 1999
+    - 2000
+    - 2001
+    - 2002
+    - 2003
+    - 2004
+    - 2005
+    - 2006
+    - 2007
+    - 2008
+    - 2009
     - 2010
     - 2011
     - 2012
@@ Expand Down @@