diff --git a/examples/recipe.py b/examples/recipe.py index 3cc11e2..1dc6c65 100644 --- a/examples/recipe.py +++ b/examples/recipe.py @@ -11,14 +11,21 @@ def main(): log.basicConfig(level=log.INFO) data = lciafmt.get_method(lciafmt.Method.RECIPE_2016) - + + #export lcia to csv before mapping + data.to_csv(outputpath+'Recipe_source.csv', index=False) + + # make flowables case insensitive to handle lack of consistent structure in source file + data['Flowable'] = data['Flowable'].str.lower() + # map the flows to the Fed.LCA commons flows # set preserve_unmapped=True if you want to keep unmapped # flows in the resulting data frame - mapped_data = lciafmt.map_flows(data, system="ReCiPe2016") + mapped_data = lciafmt.map_flows(data, system="ReCiPe2016", case_insensitive=True) # write the result to JSON-LD and CSV - mapped_data.to_csv(outputpath+"recipe_2016.csv", index=False) + for method in mapped_data['Method'].unique(): + mapped_data[mapped_data['Method']==method].to_csv(outputpath+method.replace('/','_')+".csv", index=False) json_pack = outputpath+"recipe_2016_json.zip" if os.path.exists(json_pack): os.remove(json_pack) diff --git a/lciafmt/__init__.py b/lciafmt/__init__.py index b9d4c43..8d5889a 100644 --- a/lciafmt/__init__.py +++ b/lciafmt/__init__.py @@ -33,7 +33,7 @@ def get_method(method_id, add_factors_for_missing_contexts=True, file=None, url= if method_id == Method.TRACI.value or method_id == Method.TRACI: return traci.get(add_factors_for_missing_contexts, file=file, url=None) if method_id == Method.RECIPE_2016.value or method_id == Method.RECIPE_2016: - return recipe.get(file=file, url=url) + return recipe.get(add_factors_for_missing_contexts, file=file, url=url) def clear_cache(): @@ -47,11 +47,12 @@ def to_jsonld(df: pd.DataFrame, zip_file: str, description: str="", write_flows= def map_flows(df: pd.DataFrame, system=None, mapping=None, - preserve_unmapped=False) -> pd.DataFrame: + preserve_unmapped=False, case_insensitive=False) -> pd.DataFrame: """Maps the flows in the given data frame using the given target system. It returns a new data frame with the mapped flows.""" mapper = fmap.Mapper(df, system=system, mapping=mapping, - preserve_unmapped=preserve_unmapped) + preserve_unmapped=preserve_unmapped, + case_insensitive=case_insensitive) return mapper.run() diff --git a/lciafmt/fmap.py b/lciafmt/fmap.py index 4f8a742..1b53e4b 100644 --- a/lciafmt/fmap.py +++ b/lciafmt/fmap.py @@ -85,11 +85,11 @@ def _is_strv(val) -> bool: class _FlowInfo(object): - def __init__(self, uuid="", name="", category="", unit=""): + def __init__(self, uuid="", name="", category="", unit="", conversionfactor="1.0"): self.name = name self.category = category self.unit = "kg" if not _is_strv(unit) else unit - + self.conversionfactor = str(conversionfactor) if not _is_strv(uuid): self.uuid = make_uuid(self.name, self.category, self.unit) else: @@ -99,12 +99,15 @@ def __init__(self, uuid="", name="", category="", unit=""): class Mapper(object): def __init__(self, df: pandas.DataFrame, system=None, - mapping=None, preserve_unmapped=False): + mapping=None, preserve_unmapped=False, case_insensitive=False): self.__df = df self.__system = system + self.__case_insensitive = case_insensitive if mapping is None: log.info("load flow mapping v=%s from fed.elem.flows") mapping = flowlist.get_flowmapping(source=system) + if self.__case_insensitive: + mapping['SourceFlowName'] = mapping['SourceFlowName'].str.lower() self.__mapping = mapping # type: pandas.DataFrame self.__preserve_unmapped = preserve_unmapped @@ -137,6 +140,7 @@ def run(self) -> pandas.DataFrame: r[6] = target.uuid r[7] = target.category r[8] = target.unit + r[12] = r[12]/float(target.conversionfactor) records.append(r) mapped += 1 log.info("created %i factors for mapped flows; " + @@ -166,6 +170,7 @@ def _build_map_index(self) -> dict: name=row["TargetFlowName"], category=row["TargetFlowContext"], unit=row["TargetUnit"], + conversionfactor=row["ConversionFactor"] )) log.info("indexed %i mappings for %i flows", diff --git a/lciafmt/recipe.py b/lciafmt/recipe.py index f26b605..903180d 100644 --- a/lciafmt/recipe.py +++ b/lciafmt/recipe.py @@ -8,8 +8,25 @@ import lciafmt.util as util import lciafmt.xls as xls - -def get(file=None, url=None) -> pandas.DataFrame: +contexts = { + 'urban air' : 'air/urban', + 'urban air' : 'air/urban', + 'Urban air' : 'air/urban', + 'Rural air' : 'air/rural', + 'rural air' : 'air/rural', + 'agricultural soil' : 'soil/agricultural', + 'Agricultural soil' : 'soil/agricultural', + 'industrial soil' : 'soil/industrial', + 'Industrial soil' : 'soil/industrial', + 'freshwater' : 'water/freshwater', + 'Freshwater' : 'water/freshwater', + 'fresh water' : 'water/freshwater', + 'seawater' : 'water/sea water', + 'sea water' : 'water/sea water', + 'Sea water' : 'water/sea water', + 'marine water' : 'water/sea water'} + +def get(add_factors_for_missing_contexts=True, file=None, url=None) -> pandas.DataFrame: log.info("get method ReCiPe 2016") f = file if f is None: @@ -18,7 +35,11 @@ def get(file=None, url=None) -> pandas.DataFrame: url = ("http://www.rivm.nl/sites/default/files/2018-11/" + "ReCiPe2016_CFs_v1.1_20180117.xlsx") f = cache.get_or_download(fname, url) - return _read(f) + df = _read(f) + if add_factors_for_missing_contexts: + log.info("Adding average factors for primary contexts") + df = util.aggregate_factors_for_primary_contexts(df) + return df def _read(file: str) -> pandas.DataFrame: @@ -50,14 +71,15 @@ def _read_mid_points(sheet: xlrd.book.sheet, records: list): indicator_unit, flow_unit, unit_col = _determine_units(sheet) compartment, compartment_col = _determine_compartments(sheet) + + perspectives = ["I", "H", "E"] factor_count = 0 for row in range(start_row, sheet.nrows): - if xls.cell_f64(sheet, row, data_col) == 0.0: - continue - if compartment_col > -1: compartment = xls.cell_str(sheet, row, compartment_col) + if compartment in contexts: + compartment = contexts[compartment] if unit_col > -1: flow_unit = xls.cell_str(sheet, row, unit_col) if "/" in flow_unit: @@ -217,13 +239,16 @@ def _determine_compartments(sheet: xlrd.book.sheet) -> (str, int): or _containstr(sheet.name, "ozone") \ or _containstr(sheet.name, "particulate") \ or _containstr(sheet.name, "acidification"): - log.warning("no compartment column; assuming 'emission/air'") - return "emission/air", -1 + log.warning("no compartment column; assuming 'air'") + return "air", -1 - elif _containstr(sheet.name, "mineral", "resource", "scarcity") \ - or _containstr(sheet.name, "fossil", "resource", "scarcity"): + elif _containstr(sheet.name, "mineral", "resource", "scarcity"): log.warning("no compartment column; assuming 'resource/ground'") return "resource/ground", -1 + + elif _containstr(sheet.name, "fossil", "resource", "scarcity"): + log.warning("no compartment column; assuming 'resource'") + return "resource", -1 if _containstr(sheet.name, "water", "consumption"): log.warning("no compartment column; assuming 'resource/fresh water'") diff --git a/lciafmt/util.py b/lciafmt/util.py index 3288a75..30eb9f1 100644 --- a/lciafmt/util.py +++ b/lciafmt/util.py @@ -58,7 +58,18 @@ def aggregate_factors_for_primary_contexts(df) -> pd.DataFrame: :param df: a pandas dataframe for an LCIA method :return: a pandas dataframe for an LCIA method """ + #Ignore the following impact categories for generating averages + ignored_categories = ['Land transformation', 'Land occupation', + 'Water consumption','Mineral resource scarcity', + 'Fossil resource scarcity'] indices = df['Context'].str.find('/') + ignored_list = df['Indicator'].isin(ignored_categories) + i = 0 + for k in ignored_list.iteritems(): + if k[1] == True: + indices.update(pd.Series([-1], index=[i])) + i = i + 1 + primary_context = [] i = 0 for c in df['Context']: diff --git a/lciafmt/xls.py b/lciafmt/xls.py index 6be0899..8ecf24b 100644 --- a/lciafmt/xls.py +++ b/lciafmt/xls.py @@ -26,6 +26,9 @@ def cell_val(sheet: xlrd.book.sheet, row: int, col: int): cell = sheet.cell(row, col) if cell is None: return None + #checks for errortype N/A and returns None + if cell.ctype == 5: + return None return cell.value