From a97ce71e704b75941197c2b2144606aa160dc0a0 Mon Sep 17 00:00:00 2001 From: Daniel Sorid <63077097+dsmedia@users.noreply.github.com> Date: Sun, 2 Feb 2025 12:13:02 +0000 Subject: [PATCH] adjustments to schemas - github.csv: move time range to schema - add categories to schema in seattle-weather.csv - sp500.csv, udistrict.json, uniform-2d, weather.json : move description content into schema - reformat usgs disclaimer in us-state-capitals.json - rerun build_datapackage.py --- _data/datapackage_additions.toml | 152 +++++++++++++++++++++++++++---- datapackage.json | 83 ++++++++++++----- datapackage.md | 93 ++++++++++--------- 3 files changed, 239 insertions(+), 89 deletions(-) diff --git a/_data/datapackage_additions.toml b/_data/datapackage_additions.toml index 65b75a7..6d5d4a9 100644 --- a/_data/datapackage_additions.toml +++ b/_data/datapackage_additions.toml @@ -722,8 +722,17 @@ path = "https://www.gimp.org/docs/userfaq.html#whats-the-gimps-license-and-how-d [[resources]] # Path: github.csv path = "github.csv" description = """Simulated GitHub contribution data showing hourly commit counts across -different times of day, spanning from January 1st to May 30th, 2015. Designed to demonstrate -typical patterns of developer activity in a GitHub-style punchcard visualization format.""" +different times of day. Designed to demonstrate typical patterns of developer activity +in a GitHub-style punchcard visualization format.""" + +[resources.schema] +[[resources.schema.fields]] +name = "time" +description = "Hourly timestamp from January 1st to May 30th, 2015" + +[[resources.schema.fields]] +name = "count" +description = "Simulated hourly commit counts" [[resources.sources]] title = """Generated using `/scripts/github.py`.""" @@ -1220,6 +1229,32 @@ description = """Daily weather in metric units. Transformed using `/scripts/weat The categorical \"weather\" field is synthesized from multiple fields in the original dataset. This data is intended for instructional purposes.""" +[resources.schema] +[[resources.schema.fields]] +name = "date" +description = "Date of the weather observation" + +[[resources.schema.fields]] +name = "precipitation" +description = "Amount of precipitation in millimeters" + +[[resources.schema.fields]] +name = "temp_max" +description = "Maximum daily temperature in degrees Celsius" + +[[resources.schema.fields]] +name = "temp_min" +description = "Minimum daily temperature in degrees Celsius" + +[[resources.schema.fields]] +name = "wind" +description = "Wind speed in kilometers per hour" + +[[resources.schema.fields]] +name = "weather" +description = "Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog." +categories = ["drizzle", "rain", "snow", "sun", "fog"] + [[resources.sources]] title = "NOAA National Climatic Data Center" path = "https://www.ncdc.noaa.gov/cdo-web/datatools/records" @@ -1241,8 +1276,15 @@ path = "sp500.csv" description = """Monthly closing values of the S&P 500 stock market index from January 2000 to March 2010. Captures several significant market events including the dot-com bubble burst (2000-2002), the mid-2000s bull market, and the 2008 financial crisis. -Each record contains a date in the format 'MMM D YYYY' and the corresponding closing price of -the index.""" +""" +[resources.schema] +[[resources.schema.fields]] +name = "date" +description = "Date of monthly observation in the format 'MMM D YYYY'" + +[[resources.schema.fields]] +name = "price" +description = "Closing price of the S&P 500 index for the given month" [[resources]] # Path: stocks.csv path = "stocks.csv" @@ -1256,6 +1298,44 @@ create a [ridgeline plot example](https://vega.github.io/vega/examples/u-distric of various food and beverage categories. The example graphic using this dataset states that it originally appeared in Alaska Airlines Beyond Magazine (Sep 2017, p. 120)""" +[resources.schema] +[[resources.schema.fields]] +name = "latitude" +description = "Latitude of the restaurant or cafe" + +[[resources.schema.fields]] +name = "longitude" +description = "Longitude of the restaurant or cafe" + +[[resources.schema.fields]] +name = "name" +description = "Name of the restaurant or cafe" + +[[resources.schema.fields]] +name = "category" +description = "Cuisine type of the restaurant or cafe" +categories = [ + "bakeries", + "burgers", + "bubbletea", + "coffee", + "chinese", + "drinks", + "hawaiian", + "indian", + "japanese", + "korean", + "latin", + "mideastern", + "pizza", + "seafood", + "thai", + "veg", + "vietnamese", + "breakfast", + "american", +] + [[resources]] # Path: unemployment-across-industries.json path = "unemployment-across-industries.json" description = """Industry-level unemployment from the Current Population Survey @@ -1367,12 +1447,19 @@ path = "https://www.usa.gov/government-works" path = "uniform-2d.json" description = """ Five hundred paired coordinates (u, v) sampled from a bivariate uniform distribution. Centered near the -origin (u: 0.015, v: -0.013) with ranges spanning approximately [-0.5, 0.5] in both dimensions. The standard -deviation of u is 0.277 and v is 0.276, showing a relatively equal spread. The variables exhibit negligible -correlation (-0.019), suggesting independence, as expected for a uniform distribution. The range of u -is [-0.499, 0.500] and v is [-0.500, 0.498]. A contrast to normally distributed data in `normal-2d.json` . +origin with ranges spanning approximately [-0.5, 0.5] in both dimensions. The variables exhibit negligible +correlation (-0.019), suggesting independence, as expected for a uniform distribution. +A contrast to normally distributed data in `normal-2d.json`. """ +[[resources.schema.fields]] +name = "u" +description = "mean: 0.015, std: 0.277, range: [-0.499, 0.500]" + +[[resources.schema.fields]] +name = "v" +description = "mean: -0.013, std: 0.276, range: [-0.500, 0.498]" + [[resources.sources]] title = "Generated Data" @@ -1433,11 +1520,12 @@ Includes latitude, longitude, state name, and capital city name for all 50 U.S. Cities are represented as point locations of their capitol buildings using coordinates in the WGS84 geographic coordinate system. -According to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map)), "Map services and data downloaded from The National Map are free and in the public domain. -There are no restrictions; however, we request that the following acknowledgment statement -of the originating agency be included in products and data derived from our map services -when citing, copying, or reprinting: Map services and data available from U.S. -Geological Survey, National Geospatial Program." +According to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map)) +> "Map services and data downloaded from The National Map are free and in the public domain. +> There are no restrictions; however, we request that the following acknowledgment statement +> of the originating agency be included in products and data derived from our map services +> when citing, copying, or reprinting: Map services and data available from U.S. +> Geological Survey, National Geospatial Program." """ [[resources.sources]] @@ -1465,10 +1553,40 @@ path = "https://stat.ethz.ch/R-manual/R-patched/library/datasets/html/volcano.h [[resources]] # Path: weather.csv path = "weather.csv" -description = """Daily weather observations from Seattle and New York, including location, date, precipitation, -maximum and minimum temperatures, wind speed, and a categorical weather description (drizzle, rain, snow, sun, fog). -Transformed from NOAA data using the script `/scripts/weather.py`. The categorical "weather" field is a synthesized feature, -derived from multiple fields present in the original NOAA dataset. Intended for instructional purposes.""" +description = """Daily weather observations from Seattle and New York. +Transformed from NOAA data using the script `/scripts/weather.py`. +The categorical \"weather\" field is synthesized from multiple fields in the original dataset. +Intended for instructional purposes.""" + +[resources.schema] +[[resources.schema.fields]] +name = "location" +description = "City location of the weather observation (Seattle or New York)" + +[[resources.schema.fields]] +name = "date" +description = "Date of the weather observation" + +[[resources.schema.fields]] +name = "precipitation" +description = "Amount of precipitation in millimeters" + +[[resources.schema.fields]] +name = "temp_max" +description = "Maximum daily temperature in degrees Celsius" + +[[resources.schema.fields]] +name = "temp_min" +description = "Minimum daily temperature in degrees Celsius" + +[[resources.schema.fields]] +name = "wind" +description = "Wind speed in kilometers per hour" + +[[resources.schema.fields]] +name = "weather" +description = "Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog." +categories = ["drizzle", "rain", "snow", "sun", "fog"] [[resources.sources]] title = "NOAA Climate Data Online" diff --git a/datapackage.json b/datapackage.json index f34686e..132c1d3 100644 --- a/datapackage.json +++ b/datapackage.json @@ -20,7 +20,7 @@ } ], "version": "2.11.0", - "created": "2025-02-01T21:34:01.839728+00:00", + "created": "2025-02-02T12:04:24.205787+00:00", "resources": [ { "name": "7zip.png", @@ -1838,7 +1838,7 @@ { "name": "github.csv", "type": "table", - "description": "Simulated GitHub contribution data showing hourly commit counts across \ndifferent times of day, spanning from January 1st to May 30th, 2015. Designed to demonstrate \ntypical patterns of developer activity in a GitHub-style punchcard visualization format.", + "description": "Simulated GitHub contribution data showing hourly commit counts across \ndifferent times of day. Designed to demonstrate typical patterns of developer activity \nin a GitHub-style punchcard visualization format.", "licenses": [ { "path": "https://github.com/vega/vega-datasets/blob/main/scripts/LICENSE" @@ -1860,11 +1860,13 @@ "fields": [ { "name": "time", - "type": "string" + "type": "string", + "description": "Hourly timestamp from January 1st to May 30th, 2015" }, { "name": "count", - "type": "integer" + "type": "integer", + "description": "Simulated hourly commit counts" } ] } @@ -3055,27 +3057,40 @@ "fields": [ { "name": "date", - "type": "date" + "type": "date", + "description": "Date of the weather observation" }, { "name": "precipitation", - "type": "number" + "type": "number", + "description": "Amount of precipitation in millimeters" }, { "name": "temp_max", - "type": "number" + "type": "number", + "description": "Maximum daily temperature in degrees Celsius" }, { "name": "temp_min", - "type": "number" + "type": "number", + "description": "Minimum daily temperature in degrees Celsius" }, { "name": "wind", - "type": "number" + "type": "number", + "description": "Wind speed in kilometers per hour" }, { "name": "weather", - "type": "string" + "type": "string", + "description": "Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog.", + "categories": [ + "drizzle", + "rain", + "snow", + "sun", + "fog" + ] } ] } @@ -3133,7 +3148,7 @@ { "name": "sp500.csv", "type": "table", - "description": "Monthly closing values of the S&P 500 stock market index \nfrom January 2000 to March 2010. Captures several significant market events including \nthe dot-com bubble burst (2000-2002), the mid-2000s bull market, and the 2008 financial crisis. \nEach record contains a date in the format 'MMM D YYYY' and the corresponding closing price of \nthe index.", + "description": "Monthly closing values of the S&P 500 stock market index \nfrom January 2000 to March 2010. Captures several significant market events including \nthe dot-com bubble burst (2000-2002), the mid-2000s bull market, and the 2008 financial crisis. \n", "path": "sp500.csv", "scheme": "file", "format": "csv", @@ -3145,11 +3160,13 @@ "fields": [ { "name": "date", - "type": "string" + "type": "string", + "description": "Date of monthly observation in the format 'MMM D YYYY'" }, { "name": "price", - "type": "number" + "type": "number", + "description": "Closing price of the S&P 500 index for the given month" } ] } @@ -3332,7 +3349,7 @@ { "name": "uniform-2d.json", "type": "table", - "description": "Five hundred paired coordinates (u, v) sampled from a bivariate uniform distribution. Centered near the\norigin (u: 0.015, v: -0.013) with ranges spanning approximately [-0.5, 0.5] in both dimensions. The standard \ndeviation of u is 0.277 and v is 0.276, showing a relatively equal spread. The variables exhibit negligible \ncorrelation (-0.019), suggesting independence, as expected for a uniform distribution. The range of u \nis [-0.499, 0.500] and v is [-0.500, 0.498]. A contrast to normally distributed data in `normal-2d.json` .\n", + "description": "Five hundred paired coordinates (u, v) sampled from a bivariate uniform distribution. Centered near the\norigin with ranges spanning approximately [-0.5, 0.5] in both dimensions. The variables exhibit negligible\ncorrelation (-0.019), suggesting independence, as expected for a uniform distribution.\nA contrast to normally distributed data in `normal-2d.json`.\n", "licenses": [ { "path": "https://github.com/vega/vega-datasets/blob/main/scripts/LICENSE" @@ -3359,11 +3376,13 @@ "fields": [ { "name": "u", - "type": "number" + "type": "number", + "description": "mean: 0.015, std: 0.277, range: [-0.499, 0.500]" }, { "name": "v", - "type": "number" + "type": "number", + "description": "mean: -0.013, std: 0.276, range: [-0.500, 0.498]" } ] } @@ -3523,7 +3542,7 @@ { "name": "us-state-capitals.json", "type": "table", - "description": "Geographical coordinates and names of U.S. state capitals, transformed using `scripts/us-state-capitals.py`. \nIncludes latitude, longitude, state name, and capital city name for all 50 U.S. states. \nCities are represented as point locations of their capitol buildings using coordinates in the \nWGS84 geographic coordinate system.\n\nAccording to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map)), \"Map services and data downloaded from The National Map are free and in the public domain. \nThere are no restrictions; however, we request that the following acknowledgment statement \nof the originating agency be included in products and data derived from our map services \nwhen citing, copying, or reprinting: Map services and data available from U.S. \nGeological Survey, National Geospatial Program.\"\n", + "description": "Geographical coordinates and names of U.S. state capitals, transformed using `scripts/us-state-capitals.py`. \nIncludes latitude, longitude, state name, and capital city name for all 50 U.S. states. \nCities are represented as point locations of their capitol buildings using coordinates in the \nWGS84 geographic coordinate system.\n\nAccording to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map))\n> \"Map services and data downloaded from The National Map are free and in the public domain. \n> There are no restrictions; however, we request that the following acknowledgment statement \n> of the originating agency be included in products and data derived from our map services \n> when citing, copying, or reprinting: Map services and data available from U.S. \n> Geological Survey, National Geospatial Program.\"\n", "licenses": [ { "title": "U.S. Public Domain", @@ -3594,7 +3613,7 @@ { "name": "weather.csv", "type": "table", - "description": "Daily weather observations from Seattle and New York, including location, date, precipitation, \nmaximum and minimum temperatures, wind speed, and a categorical weather description (drizzle, rain, snow, sun, fog). \nTransformed from NOAA data using the script `/scripts/weather.py`. The categorical \"weather\" field is a synthesized feature, \nderived from multiple fields present in the original NOAA dataset. Intended for instructional purposes.", + "description": "Daily weather observations from Seattle and New York.\nTransformed from NOAA data using the script `/scripts/weather.py`.\nThe categorical \"weather\" field is synthesized from multiple fields in the original dataset.\nIntended for instructional purposes.", "licenses": [ { "title": "U.S. Government Dataset", @@ -3618,31 +3637,45 @@ "fields": [ { "name": "location", - "type": "string" + "type": "string", + "description": "City location of the weather observation (Seattle or New York)" }, { "name": "date", - "type": "date" + "type": "date", + "description": "Date of the weather observation" }, { "name": "precipitation", - "type": "number" + "type": "number", + "description": "Amount of precipitation in millimeters" }, { "name": "temp_max", - "type": "number" + "type": "number", + "description": "Maximum daily temperature in degrees Celsius" }, { "name": "temp_min", - "type": "number" + "type": "number", + "description": "Minimum daily temperature in degrees Celsius" }, { "name": "wind", - "type": "number" + "type": "number", + "description": "Wind speed in kilometers per hour" }, { "name": "weather", - "type": "string" + "type": "string", + "description": "Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog.", + "categories": [ + "drizzle", + "rain", + "snow", + "sun", + "fog" + ] } ] } diff --git a/datapackage.md b/datapackage.md index 83938d5..b0d3545 100644 --- a/datapackage.md +++ b/datapackage.md @@ -1,5 +1,5 @@ # vega-datasets -`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-02-01 21:34:01 [UTC] +`2.11.0` | [GitHub](http://github.com/vega/vega-datasets.git) | 2025-02-02 12:04:24 [UTC] Common repository for example datasets used by Vega related projects. BSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets @@ -861,14 +861,14 @@ Application icon from open-source software project. Used in [Image-based Scatter github.csv ### description Simulated GitHub contribution data showing hourly commit counts across -different times of day, spanning from January 1st to May 30th, 2015. Designed to demonstrate -typical patterns of developer activity in a GitHub-style punchcard visualization format. +different times of day. Designed to demonstrate typical patterns of developer activity +in a GitHub-style punchcard visualization format. ### schema -| name | type | -|:-------|:--------| -| time | string | -| count | integer | +| name | type | description | +|:-------|:--------|:----------------------------------------------------| +| time | string | Hourly timestamp from January 1st to May 30th, 2015 | +| count | integer | Simulated hourly commit counts | ### sources | title | |:--------------------------------------| @@ -1464,14 +1464,14 @@ The categorical "weather" field is synthesized from multiple fields in the origi This data is intended for instructional purposes. ### schema -| name | type | -|:--------------|:-------| -| date | date | -| precipitation | number | -| temp_max | number | -| temp_min | number | -| wind | number | -| weather | string | +| name | type | description | categories | +|:--------------|:-------|:----------------------------------------------------------------------------------------------------------------------------|:------------------------------------------| +| date | date | Date of the weather observation | | +| precipitation | number | Amount of precipitation in millimeters | | +| temp_max | number | Maximum daily temperature in degrees Celsius | | +| temp_min | number | Minimum daily temperature in degrees Celsius | | +| wind | number | Wind speed in kilometers per hour | | +| weather | string | Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog. | ['drizzle', 'rain', 'snow', 'sun', 'fog'] | ### sources | title | path | |:-----------------------------------|:----------------------------------------------------| @@ -1507,14 +1507,13 @@ sp500.csv Monthly closing values of the S&P 500 stock market index from January 2000 to March 2010. Captures several significant market events including the dot-com bubble burst (2000-2002), the mid-2000s bull market, and the 2008 financial crisis. -Each record contains a date in the format 'MMM D YYYY' and the corresponding closing price of -the index. + ### schema -| name | type | -|:-------|:-------| -| date | string | -| price | number | +| name | type | description | +|:-------|:-------|:-------------------------------------------------------| +| date | string | Date of monthly observation in the format 'MMM D YYYY' | +| price | number | Closing price of the S&P 500 index for the given month | ## `stocks.csv` ### path stocks.csv @@ -1630,17 +1629,16 @@ When using BLS public data API and datasets, users should adhere to the [BLS Ter uniform-2d.json ### description Five hundred paired coordinates (u, v) sampled from a bivariate uniform distribution. Centered near the -origin (u: 0.015, v: -0.013) with ranges spanning approximately [-0.5, 0.5] in both dimensions. The standard -deviation of u is 0.277 and v is 0.276, showing a relatively equal spread. The variables exhibit negligible -correlation (-0.019), suggesting independence, as expected for a uniform distribution. The range of u -is [-0.499, 0.500] and v is [-0.500, 0.498]. A contrast to normally distributed data in `normal-2d.json` . +origin with ranges spanning approximately [-0.5, 0.5] in both dimensions. The variables exhibit negligible +correlation (-0.019), suggesting independence, as expected for a uniform distribution. +A contrast to normally distributed data in `normal-2d.json`. ### schema -| name | type | -|:-------|:-------| -| u | number | -| v | number | +| name | type | description | +|:-------|:-------|:-------------------------------------------------| +| u | number | mean: 0.015, std: 0.277, range: [-0.499, 0.500] | +| v | number | mean: -0.013, std: 0.276, range: [-0.500, 0.498] | ### sources | title | |:---------------| @@ -1732,11 +1730,12 @@ Includes latitude, longitude, state name, and capital city name for all 50 U.S. Cities are represented as point locations of their capitol buildings using coordinates in the WGS84 geographic coordinate system. -According to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map)), "Map services and data downloaded from The National Map are free and in the public domain. -There are no restrictions; however, we request that the following acknowledgment statement -of the originating agency be included in products and data derived from our map services -when citing, copying, or reprinting: Map services and data available from U.S. -Geological Survey, National Geospatial Program." +According to [USGS]((https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map)) +> "Map services and data downloaded from The National Map are free and in the public domain. +> There are no restrictions; however, we request that the following acknowledgment statement +> of the originating agency be included in products and data derived from our map services +> when citing, copying, or reprinting: Map services and data available from U.S. +> Geological Survey, National Geospatial Program." ### schema @@ -1771,21 +1770,21 @@ Should not be regarded as accurate. ### path weather.csv ### description -Daily weather observations from Seattle and New York, including location, date, precipitation, -maximum and minimum temperatures, wind speed, and a categorical weather description (drizzle, rain, snow, sun, fog). -Transformed from NOAA data using the script `/scripts/weather.py`. The categorical "weather" field is a synthesized feature, -derived from multiple fields present in the original NOAA dataset. Intended for instructional purposes. +Daily weather observations from Seattle and New York. +Transformed from NOAA data using the script `/scripts/weather.py`. +The categorical "weather" field is synthesized from multiple fields in the original dataset. +Intended for instructional purposes. ### schema -| name | type | -|:--------------|:-------| -| location | string | -| date | date | -| precipitation | number | -| temp_max | number | -| temp_min | number | -| wind | number | -| weather | string | +| name | type | description | categories | +|:--------------|:-------|:----------------------------------------------------------------------------------------------------------------------------|:------------------------------------------| +| location | string | City location of the weather observation (Seattle or New York) | | +| date | date | Date of the weather observation | | +| precipitation | number | Amount of precipitation in millimeters | | +| temp_max | number | Maximum daily temperature in degrees Celsius | | +| temp_min | number | Minimum daily temperature in degrees Celsius | | +| wind | number | Wind speed in kilometers per hour | | +| weather | string | Categorical weather type synthesized from original NOAA data fields. Categories include: drizzle, rain, snow, sun, and fog. | ['drizzle', 'rain', 'snow', 'sun', 'fog'] | ### sources | title | path | |:-------------------------|:-------------------------------------------------------|