From bbfb4eb7c485701f3d8717d44a608c1ba29a6440 Mon Sep 17 00:00:00 2001 From: WesIngwersen Date: Tue, 3 Dec 2024 16:37:47 -0500 Subject: [PATCH] Address #14 - edit versioning scheme - add py script to select alias - add yamls for two lists of names --- README.md | 2 +- versioning/VersioningScheme.md | 59 ++++++++++++++++++++++++ versioning/aliases.csv | 2 + versioning/data/migratory_songbirds.yaml | 25 ++++++++++ versioning/data/pies.yaml | 37 +++++++++++++++ versioning/get_alias.py | 45 ++++++++++++++++++ 6 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 versioning/VersioningScheme.md create mode 100644 versioning/aliases.csv create mode 100644 versioning/data/migratory_songbirds.yaml create mode 100644 versioning/data/pies.yaml create mode 100644 versioning/get_alias.py diff --git a/README.md b/README.md index 86346df..e8e7e79 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Both flowsa and the LCIAformatter draw on the [fedelemflowlist](https://github.c [useeiopy](https://github.com/USEPA/useeiopy) and the [IO Model Builder](https://github.com/usepa/IO-model-builder) are Python packages for assembling model components and writing the model out for serving on the USEEIO API or for use in openLCA. These packages were used for created USEEIO v1 models and are not supported for current USEEIO models. The former framework used to generate USEEIO versions <= 1.2 is still accessible under [Releases](https://github.com/USEPA/USEEIO/releases). ## Model Versioning -See the [Versioning Scheme](VersioningScheme.md) for an explanation of USEEIO model version numbers and names. +See the [Versioning Scheme](versioning/VersioningScheme.md) for an explanation of USEEIO model version numbers and names. ## Citation If you use USEEIO models for your research, please cite the original paper diff --git a/versioning/VersioningScheme.md b/versioning/VersioningScheme.md new file mode 100644 index 0000000..9035161 --- /dev/null +++ b/versioning/VersioningScheme.md @@ -0,0 +1,59 @@ +# Model Naming Scheme + +USEEIO model names follow a scheme. +This document defines and describes the current naming scheme. +These model names are intended to identify a model by version and a set of key characteristics. +They are independent of the software or authors producing the model. +They are not full model descriptions. +A named model does not imply that the model has been reviewed or released. +Models named and created prior to the advent of the + +## Naming pattern +Model names are composed of parts in a clear pattern. +The naming pattern is a set sequence of these parts with separator characters that are required when the given part is present. +``` +{loc}{root} v{major}.{minor}.{patch}-{alias}-{YY} +``` +The version number is the major.minor.patch sequence. The version number plus the build identifier follows [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html) for software with some adaptations appropriate for use for a computational model versioning scheme. + +## Name parts +The following table defines the parts of a model name. + +| Name part | Definition | Format | Example | Assumed Value if Absent | +|-----------|---------------------------------------------------------------------------------------------------------------------|-----------------------------|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| loc | Two-letter acronym for primary model location/region. 'US' for national models, and state acronym for state models | string | `US` | not applicable | +| root | Main model type | string | `EEIO` | not applicable | +| major | Major version number indicating the generation of the model | integer | `2` | not applicable | +| minor | Minor version number indicating a minor methodological/data update | integer | `0` | not applicable | +| patch | OPTIONAL. A patch number indicating a minor fix, format, or data update | integer | `1` | `0` | +| alias | See alias | name | `redstart` | not applicable | +| YY | Last two digits of year of key data. | integer | `17` | IO data year for IO schema and major model version, see [Base IO Schema and Benchmark IO Data Year for Major Model Versions](#base-io-schema-and-benchmark-io-data-year-for-major-model-versions) | + +## Model Version +National and state models have separate versioning. +The combination of the minor and major version reflect the model components that are included or their derivation. + +## Alias +Each unique set of model data components and model attributes, not including the location or year, has an alias. +The alias names are drawn from name lists that capture some aspect of [americana](https://en.wikipedia.org/wiki/Americana_(culture)) or the natural environment of the U.S. +The aliases are chosen with a random script given a common set of names used for a generation and geo-resolution of models. +For example v1 state models aliases are drawn from names of American pies; v2 national models aliases are drawn from names of U.S. songbirds that migrate in the winter. + +## Base IO Schema +The base IO schema is the set of sector codes and names for commodities and industries. +USEEIO models have used the BEA IO schema, which is updated every 5 years along with the release of the benchmark, detailed level IO tables for the same year. +USEEIO models alter names for commodities in the schema and may add or remove sectors, and hence the IO schema is used as the base schema for the model but will not necessarily be identical to the model IO schema. + +| Major / Minor Version | Base IO Schema | +|-----------------------|----------------| +| 1.0 | BEA 2007 | +| 2.0 | BEA 2012 | +| 2.2 - 2.? | BEA 2017 | + +## Examples of model names + +| Model Name | Interpretation | +|-------------------------|--------------------------------------------------------------------------------------------------------------------------| +| USEEIO v2.3-redstart-24 | A national version 2.3 model built using the 'redstart' collection of data and attributes and representing the year 2024 | +| MEEEIO v1.1-crawfish-22 | A Maine version 1.1 model built using the 'crawfish' collection of data and attributes and representing the year 2022 | + diff --git a/versioning/aliases.csv b/versioning/aliases.csv new file mode 100644 index 0000000..e22c3f8 --- /dev/null +++ b/versioning/aliases.csv @@ -0,0 +1,2 @@ +Alias,N or S,C or I,Sector Schema,Model Type,Price Type,IF source,Extentions,Indicators +redstart,N,C,BEA Detail 2017,EEIO,PRO,CEDA 2024,GHG (GHG_national_YYYYY_m2_v2.0),GHG (IPCC_AR6) diff --git a/versioning/data/migratory_songbirds.yaml b/versioning/data/migratory_songbirds.yaml new file mode 100644 index 0000000..d7f7627 --- /dev/null +++ b/versioning/data/migratory_songbirds.yaml @@ -0,0 +1,25 @@ +names: +- phoebe +- kingbird +- flycatcher +- vireo +- shrike +- martin +- swallow +- kinglet +- gnatcatcher +- wren +- catbird +- thrasher +- waxwing +- sparrow +- oriole +- thrush +- veery +- warbler +- yellowthroat +- redstart +- tanager +- grosbeak +- bunting +source: "https://www.fws.gov/refuge/anahuac/migratory-song-birds" diff --git a/versioning/data/pies.yaml b/versioning/data/pies.yaml new file mode 100644 index 0000000..88e2b11 --- /dev/null +++ b/versioning/data/pies.yaml @@ -0,0 +1,37 @@ +names: +- apple +- atlanticbeach +- bananacream +- blackberry +- blueberry +- buttermilk +- butterscotch +- cashew +- cherpumple +- cherry +- chess +- clam +- crawfish +- derby +- fried +- grape +- grasshopper +- jellycream +- keylime +- lemonicebox +- maplecustard +- maraca +- milkbar +- millionaire +- pecan +- pumpkin +- rabbit +- rhubarb +- shakerlemon +- shoofly +- strawberry +- sugarcream +- sweetpotato +- turtle +- walnut +source: "https://en.wikipedia.org/wiki/Category:American_pies" diff --git a/versioning/get_alias.py b/versioning/get_alias.py new file mode 100644 index 0000000..ca28051 --- /dev/null +++ b/versioning/get_alias.py @@ -0,0 +1,45 @@ +# get_alias.py +# !/usr/bin/env python3 +# coding=utf-8 +""" +Picks a random alias name from a source of names in a yaml file that is not already reserved in the aliases.csv + +This file requires the source name be passed like: + -s migratory_songbirds + +The source must be the base name (no extension) of a yaml file with a list of names present in the data directory +""" + +import argparse +import random +import yaml +import pandas as pd +from pathlib import Path + +verPath = Path(__file__).parent +dataPath = verPath / 'data' + +def main(**kwargs): + + parser = argparse.ArgumentParser(argument_default = argparse.SUPPRESS) + + parser.add_argument('-s', + help = 'Enter the name of yaml file of names in the data path wout the extension', + type = str) + if len(kwargs) == 0: + kwargs = vars(parser.parse_args()) + + aliases = set(pd.read_csv(verPath / "aliases.csv")["Alias"]) + source = kwargs['s'] +".yaml" + + with open(dataPath / source, "r") as file: + names = yaml.safe_load(file) + names = names.get("names") + if not names: + raise IndexError(f'names file not found {source}') + + names = list(set(names) - aliases) + print("Your next alias is ... "+ random.choice(names)) + +if __name__ == "__main__": + main() \ No newline at end of file