From 9cb05e2a4de17bccd91479ff44d64d4ecfb22bc9 Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Wed, 30 Sep 2020 15:05:27 +0300 Subject: [PATCH] Add kcidb.io extracted from kcidb --- .gitignore | 3 + .pylintrc | 17 + .travis.yml | 15 + README.md | 66 ++++ kcidb_io/__init__.py | 73 ++++ kcidb_io/schema/__init__.py | 91 +++++ kcidb_io/schema/misc.py | 156 +++++++++ kcidb_io/schema/test_v3.py | 94 ++++++ kcidb_io/schema/v1.py | 548 ++++++++++++++++++++++++++++++ kcidb_io/schema/v2.py | 569 +++++++++++++++++++++++++++++++ kcidb_io/schema/v3.py | 654 ++++++++++++++++++++++++++++++++++++ setup.py | 50 +++ 12 files changed, 2336 insertions(+) create mode 100644 .gitignore create mode 100644 .pylintrc create mode 100644 .travis.yml create mode 100644 README.md create mode 100644 kcidb_io/__init__.py create mode 100644 kcidb_io/schema/__init__.py create mode 100644 kcidb_io/schema/misc.py create mode 100644 kcidb_io/schema/test_v3.py create mode 100644 kcidb_io/schema/v1.py create mode 100644 kcidb_io/schema/v2.py create mode 100644 kcidb_io/schema/v3.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4f62c89 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +*.egg-info +*.pyc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..d917828 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,17 @@ +[MASTER] + +[MESSAGES CONTROL] +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=duplicate-code + +[REPORTS] +# Activate the evaluation score. +score=no diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..9b2d341 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python +dist: xenial +python: + - "3.6" +install: + - pip3 install '.[dev]' +before_script: + - git clone --depth=1 https://github.com/kernelci/kcidb.git .kcidb + - git -C .kcidb log -n1 +script: + - "flake8 kcidb_io *.py" + - "pylint kcidb_io *.py" + - pytest + - pip3 install .kcidb + - pytest .kcidb diff --git a/README.md b/README.md new file mode 100644 index 0000000..9bdc1b8 --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +kcidb-io +======== + +`kcidb-io` is a Python 3 library for validating and manipulating Linux Kernel +CI reports in JSON format. This library is used by [`kcidb`][kcidb] - a +package for maintaining a service storing and serving that data. + +Installation +------------ + +`kcidb-io` requires Python v3.6 or later. + +To install the package for the current user, run this command: + + pip3 install --user + +Where `` is the location of the package source, e.g. a git repo: + + pip3 install --user git+https://github.com/kernelci/kcidb-io.git + +or a directory path: + + pip3 install --user . + +In any case, make sure your PATH includes the `~/.local/bin` directory, e.g. +with: + + export PATH="$PATH":~/.local/bin + +Using +----- + +Here's an example creating an empty report and then validating it: +```python +# Import the kcidb-io package +import kcidb_io +# Create an empty report using the latest schema version +json = kcidb_io.new() +# Validate the report +kcidb_io.schema.validate(json) +``` + +Hacking +------- + +If you want to hack on the source code, install the package in the editable +mode with the `-e/--editable` option, and with "dev" extra included. E.g.: + + pip3 install --user --editable '.[dev]' + +The latter installs the `kcidb-io` package using the modules from the source +directory, and changes to them will be reflected immediately without the need +to reinstall. It also installs extra development tools, such as `flake8` and +`pylint`. + +Releasing +--------- + +Before releasing make sure the README.md is up to date. + +To make a release tag the release commit with `v`, where `` is +the next release number, e.g. `v1`. The very next commit after the tag should +update the version number in `setup.py` to be the next one. I.e. continuing +the above example, it should be `2`. + +[kcidb]: https://github.com/kernelci/kcidb/ diff --git a/kcidb_io/__init__.py b/kcidb_io/__init__.py new file mode 100644 index 0000000..dc5ff53 --- /dev/null +++ b/kcidb_io/__init__.py @@ -0,0 +1,73 @@ +"""Kernel CI reporting I/O data""" + +from copy import deepcopy +from kcidb_io import schema + +# Silence flake8 "imported but unused" warning +__all__ = ["schema", "new", "merge"] + + +def new(): + """ + Create an empty I/O data set. + + Returns: + An empty I/O data set adhering to the latest schema version. + """ + data = dict(version=dict(major=schema.LATEST.major, + minor=schema.LATEST.minor)) + assert schema.is_valid_latest(data) + return data + + +def get_obj_num(data): + """ + Calculate number of objects of any type in an I/O data set adhering to the + latest schema. + + Args: + data: The data set to count the objects in. + Must adhere to the latest schema. + + Returns: + The number of objects in the data set. + """ + assert schema.is_valid_latest(data) + return sum(len(data[k]) for k in schema.LATEST.tree if k and k in data) + + +def merge(target, sources, copy_target=True, copy_sources=True): + """ + Merge multiple I/O data into a destination. + + Args: + target: The data to merge into. + sources: An iterable containing data sets to merge from. + copy_target: True if "target" contents should be copied before + upgrading and modifying. False if not. + Default is True. + copy_sources: True if "source" contents should be copied before + upgrading and referencing. False if not. + Default is True. + + Returns: + The merged data, adhering to the latest schema version. + """ + assert schema.is_valid(target) + + if copy_target: + target = deepcopy(target) + target = schema.upgrade(target, copy=False) + + for source in sources: + assert schema.is_valid(source) + if copy_sources: + source = deepcopy(source) + source = schema.upgrade(source, copy=False) + for obj_list_name in schema.LATEST.tree: + if obj_list_name in source: + target[obj_list_name] = \ + target.get(obj_list_name, []) + source[obj_list_name] + + assert schema.is_valid_latest(target) + return target diff --git a/kcidb_io/schema/__init__.py b/kcidb_io/schema/__init__.py new file mode 100644 index 0000000..0645159 --- /dev/null +++ b/kcidb_io/schema/__init__.py @@ -0,0 +1,91 @@ +"""Kernel CI reporting I/O schema""" + +from kcidb_io.schema import v1, v2, v3 + +# Version 1 +V1 = v1.VERSION +# Version 2 +V2 = v2.VERSION +# Version 3 +V3 = v3.VERSION +# Latest version of the schema +LATEST = V3 + + +def validate(data): + """ + Validate I/O data against one of the schema versions. + + Args: + data: The data to validate. Will not be changed. + + Returns: + The validated (but unchanged) data. + + Raises: + `jsonschema.exceptions.ValidationError` if the data did not adhere + to any of the schema versions. + """ + return LATEST.validate(data) + + +def is_valid(data): + """ + Check if I/O data is valid according to a schema version. + + Args: + data: The data to check. + + Returns: + True if the data is valid, false otherwise. + """ + return LATEST.is_valid(data) + + +def validate_latest(data): + """ + Validate I/O data against the latest schema version only. + + Args: + data: The data to validate. Will not be changed. + + Returns: + The validated (but unchanged) data. + + Raises: + `jsonschema.exceptions.ValidationError` if the data did not adhere + to the latest schema version. + """ + return LATEST.validate_exactly(data) + + +def is_valid_latest(data): + """ + Check if I/O data is valid according to the latest schema version. + + Args: + data: The data to check. + + Returns: + True if the data is valid, false otherwise. + """ + return LATEST.is_valid_exactly(data) + + +def upgrade(data, copy=True): + """ + Upgrade the data to the latest schema version from any of the previous + versions. Validates the data. Has no effect if the data already adheres to + the latest schema version. + + Args: + data: The data to upgrade and validate. + Must adhere to a version of the schema. + copy: True, if the data should be copied before upgrading. + False, if the data should be upgraded in-place. + Optional, default is True. + + Returns: + The upgraded and validated data. + """ + return LATEST.upgrade(data, copy) diff --git a/kcidb_io/schema/misc.py b/kcidb_io/schema/misc.py new file mode 100644 index 0000000..c6627dc --- /dev/null +++ b/kcidb_io/schema/misc.py @@ -0,0 +1,156 @@ +"""Kernel CI reporting I/O schema - misc definitions""" + +from copy import deepcopy +import jsonschema + + +class Version: + """A version of the schema""" + # pylint: disable=too-many-arguments + def __init__(self, major, minor, json, tree, previous=None, inherit=None): + """ + Initialize the version. + + Args: + major: The major version number. A non-negative integer. + Increases represent backward-incompatible changes. + E.g. deleting or renaming a property, changing a + property type, restricting values, making a property + required, or adding a new required property. + minor: The minor version number. A non-negative integer. + Increases represent backward-compatible changes. E.g. + relaxing value restrictions, making a property + optional, or adding a new optional property. + json: The JSON schema for this version. + tree: A tree of parent-child relationships for objects in + data's top-level lists, expressed as a dictionary of + object list names to a list of the same, with the + empty string mapping to a list of topmost object list + names. + previous: The previous schema version, or None if none. + Must have lower major number, if not None. + inherit: The data inheritance function. Must accept data + adhering to the "previous" version of the schema as + the only argument, and return the data adhering to + this version. Can modify the argument. Can be None, + meaning no transformation needed. Must be None if + "previous" is None. + """ + assert isinstance(major, int) and major >= 0 + assert isinstance(minor, int) and minor >= 0 + assert json is not None + assert isinstance(tree, dict) + assert all(isinstance(k, str) and + isinstance(v, list) and + all(isinstance(e, str) for e in v) + for k, v in tree.items()) + assert previous is None or \ + isinstance(previous, Version) and (major > previous.major) + assert inherit is None or previous is not None and callable(inherit) + + self.major = major + self.minor = minor + self.previous = previous + self.json = json + self.tree = tree + self.inherit = inherit + + def validate_exactly(self, data): + """ + Validate the data against this schema version only. + + Args: + data: The data to validate. Will not be changed. + + Returns: + The validated (but unchanged) data. + + Raises: + `jsonschema.exceptions.ValidationError` if the data did not adhere + to this version of the schema. + """ + jsonschema.validate(instance=data, schema=self.json, + format_checker=jsonschema.draft7_format_checker) + return data + + def is_valid_exactly(self, data): + """ + Check if data is valid according to this schema version only. + + Args: + data: The data to check against the schema. + + Returns: + True if the data is valid, false otherwise. + """ + try: + self.validate_exactly(data) + except jsonschema.exceptions.ValidationError: + return False + return True + + def validate(self, data): + """ + Validate the data against this or previous schema versions. + + Args: + data: The data to validate. Will not be changed. + + Returns: + The validated (but unchanged) data. + + Raises: + `jsonschema.exceptions.ValidationError` if the data did not adhere + to this or a previous version of the schema. + """ + # Check for "previous" outside except block to avoid re-raising + if self.previous: + try: + return self.validate_exactly(data) + except jsonschema.exceptions.ValidationError: + return self.previous.validate(data) + return self.validate_exactly(data) + + def is_valid(self, data): + """ + Check if data is valid according to this or previous schema version. + + Args: + data: The data to check against the schema. + + Returns: + True if the data is valid, false otherwise. + """ + try: + self.validate(data) + except jsonschema.exceptions.ValidationError: + return False + return True + + def upgrade(self, data, copy=True): + """ + Upgrade the data to this version from any of the previous schema + versions. Validates the data. Has no effect if the data already + adheres to this schema version. + + Args: + data: The data to upgrade and validate. Must adhere to this + version or any of the previous versions. + copy: True, if the data should be copied before upgrading. + False, if the data should be upgraded in-place. + Optional, default is True. + + Returns: + The upgraded and validated data. + """ + # Check for "previous" outside except block to avoid re-raising + if self.previous: + try: + data = self.validate_exactly(data) + except jsonschema.exceptions.ValidationError: + if copy: + data = deepcopy(data) + data = self.previous.upgrade(data, copy=False) + if self.inherit: + data = self.inherit(data) + return self.validate_exactly(data) diff --git a/kcidb_io/schema/test_v3.py b/kcidb_io/schema/test_v3.py new file mode 100644 index 0000000..7f4b2aa --- /dev/null +++ b/kcidb_io/schema/test_v3.py @@ -0,0 +1,94 @@ +"""v3 module tests""" + +import unittest +from kcidb_io.schema.v3 import VERSION + +# Disable long line checking for JSON data +# flake8: noqa +# pylint: disable=line-too-long + + +class UpgradeTestCase(unittest.TestCase): + """upgrade() test case""" + + def setUp(self): + """Setup tests""" + # pylint: disable=invalid-name + self.maxDiff = None + + def test_origin(self): + """Check origin extraction and removal works""" + prev_version_data = dict( + version=dict(major=VERSION.previous.major, + minor=VERSION.previous.minor), + revisions=[ + dict(id="origin1:5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e"), + ], + builds=[ + dict(revision_id="origin1:5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + id="origin2:1"), + dict(revision_id="origin1:5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + id="origin3:2"), + ], + tests=[ + dict(build_id="origin2:1", id="origin4:1-1"), + dict(build_id="origin2:1", id="origin5:1-2"), + dict(build_id="origin3:2", id="origin6:2-1"), + dict(build_id="origin3:2", id="origin7:2-2"), + ], + ) + new_version_data = dict( + version=dict(major=VERSION.major, + minor=VERSION.minor), + revisions=[ + dict(id="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + origin="origin1") + ], + builds=[ + dict(revision_id="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + id="origin2:1", + origin="origin2"), + dict(revision_id="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + id="origin3:2", + origin="origin3"), + ], + tests=[ + dict(build_id="origin2:1", id="origin4:1-1", origin="origin4"), + dict(build_id="origin2:1", id="origin5:1-2", origin="origin5"), + dict(build_id="origin3:2", id="origin6:2-1", origin="origin6"), + dict(build_id="origin3:2", id="origin7:2-2", origin="origin7"), + ], + ) + + self.assertEqual(VERSION.upgrade(prev_version_data), new_version_data) + + def test_repository_commit_rename(self): + """Check git_repository_commit* rename to git_commit* works""" + prev_version_data = dict( + version=dict(major=VERSION.previous.major, + minor=VERSION.previous.minor), + revisions=[ + dict(id="origin1:5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + git_repository_commit_hash="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + git_repository_commit_name="foo"), + dict(id="origin2:41f53451e75df9864a78c83e935e98ede7a170c2", + git_repository_commit_hash="41f53451e75df9864a78c83e935e98ede7a170c2", + git_repository_commit_name="bar"), + ], + ) + new_version_data = dict( + version=dict(major=VERSION.major, + minor=VERSION.minor), + revisions=[ + dict(id="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + origin="origin1", + git_commit_hash="5e29d1443c46b6ca70a4c940a67e8c09f05dcb7e", + git_commit_name="foo"), + dict(id="41f53451e75df9864a78c83e935e98ede7a170c2", + origin="origin2", + git_commit_hash="41f53451e75df9864a78c83e935e98ede7a170c2", + git_commit_name="bar"), + ], + ) + + self.assertEqual(VERSION.upgrade(prev_version_data), new_version_data) diff --git a/kcidb_io/schema/v1.py b/kcidb_io/schema/v1.py new file mode 100644 index 0000000..b3917c4 --- /dev/null +++ b/kcidb_io/schema/v1.py @@ -0,0 +1,548 @@ +"""Kernel CI reporting I/O schema v1""" + +from kcidb_io.schema.misc import Version + +# Major version number of JSON schema. +JSON_VERSION_MAJOR = 1 + +# Minor version number of JSON schema. +JSON_VERSION_MINOR = 1 + +# JSON schema for a named remote resource +JSON_RESOURCE = { + "title": "resource", + "description": "A named remote resource", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": + "Resource name. Must be usable as a local file name for the " + "downloaded resource.", + }, + "url": { + "type": "string", + "format": "uri", + "description": + "Resource URL. Must point to the resource file directly, " + "so it could be downloaded automatically.", + }, + }, + "additionalProperties": False, + "required": [ + "name", + "url", + ], + "examples": [ + { + "name": "console.log", + "url": + "https://artifacts.cki-project.org/pipelines/223563/logs/" + "aarch64_host_1_console.log" + }, + { + "name": "kernel.tar.gz", + "url": + "https://artifacts.cki-project.org/pipelines/224569/" + "kernel-stable-aarch64-" + "a2fc8ee6676067f27d2f5c6e4d512adff3d9938c.tar.gz" + } + ] +} + +# JSON schema for a code revision +JSON_REVISION = { + "title": "revision", + "description": + "A revision of the tested code.\n" + "\n" + "Represents a way the tested source code could be obtained. E.g. " + "checking out a particular commit from a git repo, and applying a " + "set of patches on top.", + "type": "object", + "properties": { + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the revision", + "pattern": "^[a-z0-9_]*$" + }, + "origin_id": { + "type": "string", + "description": "Origin-unique revision ID", + }, + "git_repository_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the Git repository which contains the base code " + "of the revision. The shortest possible https:// URL, or, if " + "that's not available, the shortest possible git:// URL.", + }, + "git_repository_commit_hash": { + "type": "string", + "description": + "The full commit hash of the revision's base code " + "in the Git repository", + }, + "git_repository_commit_name": { + "type": "string", + "description": + "A human-readable name of the commit containing the base " + "code of the revision, as would be output by " + "\"git describe\", at the discovery time." + }, + "git_repository_branch": { + "type": "string", + "description": + "The Git repository branch in which the commit with the " + "revision's base code was discovered." + }, + "patch_mboxes": { + "type": "array", + "description": + "List of mboxes containing patches applied " + "to the base code of the revision, in order of application", + "items": JSON_RESOURCE, + }, + "message_id": { + "type": "string", + "format": "email", + "description": + "The value of the Message-ID header of the e-mail message " + "introducing this code revision, if any. E.g. a message with " + "the revision's patchset, or a release announcement sent to " + "a maillist.", + }, + "description": { + "type": "string", + "description": + "Human-readable description of the revision. " + "E.g. a release version, or the subject of a patchset message." + }, + "publishing_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was made public. E.g. the timestamp " + "on a patch message, a commit, or a tag.", + }, + "discovery_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was discovered by the CI system. " + "E.g. the time the CI system found a patch message, or " + "noticed a new commit or a new tag in a git repo.", + }, + "contacts": { + "type": "array", + "description": + "List of e-mail addresses of contacts concerned with " + "this revision, such as authors, reviewers, and mail lists", + "items": { + "type": "string", + "description": + "An e-mail address of a contact concerned with this " + "revision, e.g. an author, a reviewer, or a mail list, " + "as in https://tools.ietf.org/html/rfc5322#section-3.4" + }, + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the log file of the attempt to construct this " + "revision from its parts. E.g. 'git am' output.", + }, + "valid": { + "type": "boolean", + "description": + "True if the revision is valid, i.e. if its parts could be " + "combined. False if not, e.g. if its patches failed to apply." + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the revision", + }, + }, + "additionalProperties": False, + "required": [ + "origin", + "origin_id", + ], +} + +# JSON schema for a build of a revision +JSON_BUILD = { + "title": "build", + "description": "A build of a revision", + "type": "object", + "properties": { + "revision_origin": { + "type": "string", + "description": + "The name of the CI system which submitted the built revision", + "pattern": "^[a-z0-9_]*$" + }, + "revision_origin_id": { + "type": "string", + "description": + "Origin-unique ID of the built revision. The revision must " + "be valid for the build to be considered valid.", + }, + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the build", + "pattern": "^[a-z0-9_]*$" + }, + "origin_id": { + "type": "string", + "description": "Origin-unique build ID", + }, + "description": { + "type": "string", + "description": + "Human-readable description of the build" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the build was started", + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to complete the build", + }, + "architecture": { + "type": "string", + "description": + "Target architecture of the build", + "pattern": "^[a-z0-9_]*$" + }, + "command": { + "type": "string", + "description": + "Full shell command line used to make the build, " + "including environment variables", + }, + "compiler": { + "type": "string", + "description": + "Name and version of the compiler used to make the build", + }, + "input_files": { + "type": "array", + "description": + "A list of build input files. E.g. configuration.", + "items": JSON_RESOURCE, + }, + "output_files": { + "type": "array", + "description": + "A list of build output files: images, packages, etc.", + "items": JSON_RESOURCE, + }, + "config_name": { + "type": "string", + "description": + "A name describing the build configuration options.", + }, + "config_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build configuration file.", + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build log file.", + }, + "valid": { + "type": "boolean", + "description": + "True if the build is valid, i.e. if it could be completed. " + "False if not.", + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the build", + }, + }, + "additionalProperties": False, + "required": [ + "revision_origin", + "revision_origin_id", + "origin", + "origin_id", + ], +} + +# JSON schema for a test run on a build +JSON_TEST = { + "title": "test", + "description": + "A test run against a build.\n" + "\n" + "Could represent a result of execution of a test suite program, a " + "result of one of the tests done by the test suite program, as well " + "as a summary of a collection of test suite results.\n" + "\n" + "Each test run should normally have a dot-separated test \"path\" " + "specified in the \"path\" property, which could identify a specific " + "test within a test suite (e.g. \"LTPlite.sem01\"), a whole test " + "suite (e.g. \"LTPlite\"), or the summary of all tests for a build " + "("" - the empty string).", + "type": "object", + "properties": { + "build_origin": { + "type": "string", + "description": + "The name of the CI system which submitted the tested build", + "pattern": "^[a-z0-9_]*$" + }, + "build_origin_id": { + "type": "string", + "description": + "Origin-unique ID of the tested build. The build must be " + "valid for the test run to be considered valid.", + }, + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the test run", + "pattern": "^[a-z0-9_]*$" + }, + "origin_id": { + "type": "string", + "description": "Origin-unique ID of the test run", + }, + "environment": { + "type": "object", + "description": + "The environment the test ran in. " + "E.g. a host, a set of hosts, or a lab; " + "amount of memory/storage/CPUs, for each host; " + "process environment variables, etc.", + "properties": { + "description": { + "type": "string", + "description": + "Human-readable description of the environment" + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the environment", + }, + }, + "additionalProperties": False, + }, + "path": { + "type": "string", + "description": + "Dot-separated path to the node in the test classification " + "tree the executed test belongs to. E.g. \"LTPlite.sem01\". " + "The empty string signifies the root of the tree, i.e. all " + "tests for the build, executed by the origin CI system.", + "pattern": "^[.a-zA-Z0-9_-]*$" + }, + "description": { + "type": "string", + "description": + "Human-readable description of the test run" + }, + "status": { + "type": "string", + "description": + "The test status, one of the following. " + "\"ERROR\" - the test is faulty, " + "the status of the tested code is unknown. " + "\"FAIL\" - the test has failed, the tested code is faulty. " + "\"PASS\" - the test has passed, the tested code is correct. " + "\"DONE\" - the test has finished successfully, " + "the status of the tested code is unknown. " + "\"SKIP\" - the test wasn't executed, " + "the status of the tested code is unknown.\n" + "\n" + "The status names above are listed in priority order " + "(highest to lowest), which could be used for producing a " + "summary status for a collection of test runs, e.g. for all " + "testing done on a build, based on results of executed test " + "suites. The summary status would be the highest priority " + "status across all test runs in a collection.", + "enum": ["ERROR", "FAIL", "PASS", "DONE", "SKIP"], + }, + "waived": { + "type": "boolean", + "description": + "True if the test status should be ignored.\n" + "\n" + "Could be used for reporting test results without affecting " + "the overall test status and alerting the contacts concerned " + "with the tested code revision. For example, for collecting " + "test reliability statistics when the test is first " + "introduced, or is being fixed.", + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the test run was started", + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to run the test", + }, + "output_files": { + "type": "array", + "description": + "A list of test outputs: logs, dumps, etc.", + "items": JSON_RESOURCE, + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the test run", + }, + }, + "additionalProperties": False, + "required": [ + "build_origin", + "build_origin_id", + "origin", + "origin_id", + ], +} + +# JSON schema for I/O data +JSON = { + "title": "kcidb", + "description": + "Kernel CI report data. To be submitted to/queried from the common " + "report database.\n" + "\n" + "Objects in the data are identified and linked together using two " + "properties: \"origin\" and \"origin_id\". The former is a string " + "identifying the CI system which submitted the object. The latter " + "is a string generated by the origin CI system, identifying that " + "object uniquely among all objects of the same type, coming from " + "that CI system.\n" + "\n" + "Any of the immediate properties (except \"version\") can be missing " + "or be an empty list with each submission/query, but only complete " + "data stored in the database should be considered valid.\n" + "\n" + "E.g. a test run referring to a non-existent build is allowed " + "into/from the database, but would only appear in reports once both " + "the build and its revision are present.\n" + "\n" + "No special meaning apart from \"data is missing\" is attached to " + "any immediate or deeper properties being omitted, when they're not " + "required, and no default values should be assumed for them.\n" + "\n" + "Extra free-form data can be stored under \"misc\" fields associated " + "with various objects throughout the schema, if necessary. That data " + "could later be used as the basis for defining new properties to " + "house it.", + "type": "object", + "properties": { + "version": { + "oneOf": [ + { + "type": "string", + "description": + "Version of the schema the data complies to.\n" + "\n" + "Must be a string representing two unsigned integer " + "numbers: major and minor, separated by a dot. If " + "both the dot and the minor number are omitted, the " + "minor number is assumed to be zero.\n" + "\n" + "Increases in major version number represent changes " + "which are not backward-compatible, such as renaming " + "a property, or changing a type of property, which " + "existing software versions cannot handle.\n" + "\n" + "Increases in minor version number represent changes " + "which are backward-compatible, such as relaxing " + "value restrictions, or making a property optional.", + "pattern": "^1(\\.0)?$" + }, + { + "type": "object", + "properties": { + "major": { + "type": "integer", + "const": JSON_VERSION_MAJOR, + "description": + "Major number of the schema version.\n" + "\n" + "Increases represent backward-incompatible " + "changes. E.g. deleting or renaming a " + "property, changing a property type, " + "restricting values, making a property " + "required, or adding a new required " + "property.", + }, + "minor": { + "type": "integer", + "minimum": 0, + "maximum": JSON_VERSION_MINOR, + "description": + "Minor number of the schema version.\n" + "\n" + "Increases represent backward-compatible " + "changes. E.g. relaxing value restrictions, " + "making a property optional, or adding a new " + "optional property.", + } + }, + "additionalProperties": False, + "required": [ + "major", + ], + }, + ], + }, + "revisions": { + "description": "List of code revisions", + "type": "array", + "items": JSON_REVISION, + }, + "builds": { + "description": "List of builds", + "type": "array", + "items": JSON_BUILD, + }, + "tests": { + "description": "List of test runs", + "type": "array", + "items": JSON_TEST, + }, + }, + "additionalProperties": False, + "required": [ + "version", + ] +} + +# The parent-child relationship tree +TREE = { + "": ["revisions"], + "revisions": ["builds"], + "builds": ["tests"], + "tests": [] +} + +VERSION = Version(JSON_VERSION_MAJOR, JSON_VERSION_MINOR, JSON, TREE) + +__all__ = ["VERSION"] diff --git a/kcidb_io/schema/v2.py b/kcidb_io/schema/v2.py new file mode 100644 index 0000000..7932c2e --- /dev/null +++ b/kcidb_io/schema/v2.py @@ -0,0 +1,569 @@ +"""Kernel CI reporting I/O schema v2""" + +from kcidb_io.schema.misc import Version +from kcidb_io.schema import v1 + +# Major version number of JSON schema. +JSON_VERSION_MAJOR = 2 + +# Minor version number of JSON schema. +JSON_VERSION_MINOR = 0 + +# A regular expression pattern matching permitted ID strings +ID_PATTERN = "^[a-z0-9_]+:" + +# JSON schema for a named remote resource +JSON_RESOURCE = { + "title": "resource", + "description": "A named remote resource", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": + "Resource name. Must be usable as a local file name for the " + "downloaded resource. Cannot be empty. Should not include " + "directories.", + "pattern": "^[^/]+$", + }, + "url": { + "type": "string", + "format": "uri", + "description": + "Resource URL. Must point to the resource file directly, " + "so it could be downloaded automatically.", + }, + }, + "additionalProperties": False, + "required": [ + "name", + "url", + ], + "examples": [ + { + "name": "console.log", + "url": + "https://artifacts.cki-project.org/pipelines/223563/logs/" + "aarch64_host_1_console.log" + }, + { + "name": "kernel.tar.gz", + "url": + "https://artifacts.cki-project.org/pipelines/224569/" + "kernel-stable-aarch64-" + "a2fc8ee6676067f27d2f5c6e4d512adff3d9938c.tar.gz" + } + ] +} + +# JSON schema for a code revision +JSON_REVISION = { + "title": "revision", + "description": + "A revision of the tested code.\n" + "\n" + "Represents a way the tested source code could be obtained. E.g. " + "checking out a particular commit from a git repo, and applying a " + "set of patches on top.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": + "Revision ID.\n" + "\n" + "Must start with a non-empty string identifying the CI " + "system which submitted the revision, followed by a colon " + "':' character. The rest of the string is generated by the " + "origin CI system, and must identify the revision uniquely " + "among all revisions, coming from that CI system.\n", + "pattern": ID_PATTERN, + }, + "git_repository_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the Git repository which contains the base code " + "of the revision. The shortest possible https:// URL, or, if " + "that's not available, the shortest possible git:// URL.", + }, + "git_repository_commit_hash": { + "type": "string", + "description": + "The full commit hash of the revision's base code " + "in the Git repository", + }, + "git_repository_commit_name": { + "type": "string", + "description": + "A human-readable name of the commit containing the base " + "code of the revision, as would be output by " + "\"git describe\", at the discovery time." + }, + "git_repository_branch": { + "type": "string", + "description": + "The Git repository branch in which the commit with the " + "revision's base code was discovered." + }, + "patch_mboxes": { + "type": "array", + "description": + "List of mboxes containing patches applied " + "to the base code of the revision, in order of application", + "items": JSON_RESOURCE, + }, + "message_id": { + "type": "string", + "format": "email", + "description": + "The value of the Message-ID header of the e-mail message " + "introducing this code revision, if any. E.g. a message with " + "the revision's patchset, or a release announcement sent to " + "a maillist.", + }, + "description": { + "type": "string", + "description": + "Human-readable description of the revision. " + "E.g. a release version, or the subject of a patchset message." + }, + "publishing_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was made public. E.g. the timestamp " + "on a patch message, a commit, or a tag.", + }, + "discovery_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was discovered by the CI system. " + "E.g. the time the CI system found a patch message, or " + "noticed a new commit or a new tag in a git repo.", + }, + "contacts": { + "type": "array", + "description": + "List of e-mail addresses of contacts concerned with " + "this revision, such as authors, reviewers, and mail lists", + "items": { + "type": "string", + "description": + "An e-mail address of a contact concerned with this " + "revision, e.g. an author, a reviewer, or a mail list, " + "as in https://tools.ietf.org/html/rfc5322#section-3.4" + }, + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the log file of the attempt to construct this " + "revision from its parts. E.g. 'git am' output.", + }, + "valid": { + "type": "boolean", + "description": + "True if the revision is valid, i.e. if its parts could be " + "combined. False if not, e.g. if its patches failed to apply." + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the revision", + }, + }, + "additionalProperties": False, + "required": [ + "id", + ], +} + +# JSON schema for a build of a revision +JSON_BUILD = { + "title": "build", + "description": "A build of a revision", + "type": "object", + "properties": { + "revision_id": { + "type": "string", + "description": + "ID of the built revision. The revision must " + "be valid for the build to be considered valid.", + "pattern": ID_PATTERN, + }, + "id": { + "type": "string", + "description": + "Build ID\n" + "\n" + "Must start with a non-empty string identifying the CI " + "system which submitted the build, followed by a colon " + "':' character. The rest of the string is generated by the " + "origin CI system, and must identify the build uniquely " + "among all builds, coming from that CI system.\n", + "pattern": ID_PATTERN, + }, + "description": { + "type": "string", + "description": + "Human-readable description of the build" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the build was started", + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to complete the build", + }, + "architecture": { + "type": "string", + "description": + "Target architecture of the build", + "pattern": "^[a-z0-9_]*$" + }, + "command": { + "type": "string", + "description": + "Full shell command line used to make the build, " + "including environment variables", + }, + "compiler": { + "type": "string", + "description": + "Name and version of the compiler used to make the build", + }, + "input_files": { + "type": "array", + "description": + "A list of build input files. E.g. configuration.", + "items": JSON_RESOURCE, + }, + "output_files": { + "type": "array", + "description": + "A list of build output files: images, packages, etc.", + "items": JSON_RESOURCE, + }, + "config_name": { + "type": "string", + "description": + "A name describing the build configuration options.", + }, + "config_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build configuration file.", + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build log file.", + }, + "valid": { + "type": "boolean", + "description": + "True if the build is valid, i.e. if it could be completed. " + "False if not.", + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the build", + }, + }, + "additionalProperties": False, + "required": [ + "revision_id", + "id", + ], +} + +# JSON schema for a test run on a build +JSON_TEST = { + "title": "test", + "description": + "A test run against a build.\n" + "\n" + "Could represent a result of execution of a test suite program, a " + "result of one of the tests done by the test suite program, as well " + "as a summary of a collection of test suite results.\n" + "\n" + "Each test run should normally have a dot-separated test \"path\" " + "specified in the \"path\" property, which could identify a specific " + "test within a test suite (e.g. \"LTPlite.sem01\"), a whole test " + "suite (e.g. \"LTPlite\"), or the summary of all tests for a build " + "("" - the empty string).", + "type": "object", + "properties": { + "build_id": { + "type": "string", + "description": + "ID of the tested build. The build must be " + "valid for the test run to be considered valid.", + "pattern": ID_PATTERN, + }, + "id": { + "type": "string", + "description": + "ID of the test run\n" + "\n" + "Must start with a non-empty string identifying the CI " + "system which submitted the test run, followed by a colon " + "':' character. The rest of the string is generated by the " + "origin CI system, and must identify the test run uniquely " + "among all test runs, coming from that CI system.\n", + "pattern": ID_PATTERN, + }, + "environment": { + "type": "object", + "description": + "The environment the test ran in. " + "E.g. a host, a set of hosts, or a lab; " + "amount of memory/storage/CPUs, for each host; " + "process environment variables, etc.", + "properties": { + "description": { + "type": "string", + "description": + "Human-readable description of the environment" + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the environment", + }, + }, + "additionalProperties": False, + }, + "path": { + "type": "string", + "description": + "Dot-separated path to the node in the test classification " + "tree the executed test belongs to. E.g. \"LTPlite.sem01\". " + "The empty string signifies the root of the tree, i.e. all " + "tests for the build, executed by the origin CI system.", + "pattern": "^[.a-zA-Z0-9_-]*$" + }, + "description": { + "type": "string", + "description": + "Human-readable description of the test run" + }, + "status": { + "type": "string", + "description": + "The test status, one of the following. " + "\"ERROR\" - the test is faulty, " + "the status of the tested code is unknown. " + "\"FAIL\" - the test has failed, the tested code is faulty. " + "\"PASS\" - the test has passed, the tested code is correct. " + "\"DONE\" - the test has finished successfully, " + "the status of the tested code is unknown. " + "\"SKIP\" - the test wasn't executed, " + "the status of the tested code is unknown.\n" + "\n" + "The status names above are listed in priority order " + "(highest to lowest), which could be used for producing a " + "summary status for a collection of test runs, e.g. for all " + "testing done on a build, based on results of executed test " + "suites. The summary status would be the highest priority " + "status across all test runs in a collection.", + "enum": ["ERROR", "FAIL", "PASS", "DONE", "SKIP"], + }, + "waived": { + "type": "boolean", + "description": + "True if the test status should be ignored.\n" + "\n" + "Could be used for reporting test results without affecting " + "the overall test status and alerting the contacts concerned " + "with the tested code revision. For example, for collecting " + "test reliability statistics when the test is first " + "introduced, or is being fixed.", + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the test run was started", + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to run the test", + }, + "output_files": { + "type": "array", + "description": + "A list of test outputs: logs, dumps, etc.", + "items": JSON_RESOURCE, + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the test run", + }, + }, + "additionalProperties": False, + "required": [ + "build_id", + "id", + ], +} + +# JSON schema for I/O data +JSON = { + "title": "kcidb", + "description": + "Kernel CI report data. To be submitted to/queried from the common " + "report database.\n" + "\n" + "Objects in the data are identified and linked together using \"id\" " + "and \"*_id\" string properties. Each value of these properties " + "must start with a non-empty string identifying the CI system which " + "submitted the object, followed by a colon ':' character. The rest " + "of the string is generated by the origin CI system, and must " + "identify that object uniquely among all objects of the same type, " + "coming from that CI system.\n" + "\n" + "Any of the immediate properties (except \"version\") can be missing " + "or be an empty list with each submission/query, but only complete " + "data stored in the database should be considered valid.\n" + "\n" + "E.g. a test run referring to a non-existent build is allowed " + "into/from the database, but would only appear in reports once both " + "the build and its revision are present.\n" + "\n" + "No special meaning apart from \"data is missing\" is attached to " + "any immediate or deeper properties being omitted, when they're not " + "required, and no default values should be assumed for them.\n" + "At the same time, no properties can be null.\n" + "\n" + "Extra free-form data can be stored under \"misc\" fields associated " + "with various objects throughout the schema, if necessary. That data " + "could later be used as the basis for defining new properties to " + "house it.", + "type": "object", + "properties": { + "version": { + "type": "object", + "properties": { + "major": { + "type": "integer", + "const": JSON_VERSION_MAJOR, + "description": + "Major number of the schema version.\n" + "\n" + "Increases represent backward-incompatible " + "changes. E.g. deleting or renaming a " + "property, changing a property type, " + "restricting values, making a property " + "required, or adding a new required " + "property.", + }, + "minor": { + "type": "integer", + "minimum": 0, + "maximum": JSON_VERSION_MINOR, + "description": + "Minor number of the schema version.\n" + "\n" + "Increases represent backward-compatible " + "changes. E.g. relaxing value restrictions, " + "making a property optional, or adding a new " + "optional property.", + } + }, + "additionalProperties": False, + "required": [ + "major", + ], + }, + "revisions": { + "description": "List of code revisions", + "type": "array", + "items": JSON_REVISION, + }, + "builds": { + "description": "List of builds", + "type": "array", + "items": JSON_BUILD, + }, + "tests": { + "description": "List of test runs", + "type": "array", + "items": JSON_TEST, + }, + }, + "additionalProperties": False, + "required": [ + "version", + ] +} + + +def inherit(data): + """ + Inherit data, i.e. convert data adhering to the previous version of + the schema to satisfy this version of the schema. + + Args: + data: The data to inherit. + Will be modified in place. + + Returns: + The inherited data. + """ + # Merge *origin and *origin_id properties into *id properties + # pylint: disable=redefined-builtin,invalid-name + for collection, id_pair_map in \ + dict(revisions=dict(id=('origin', 'origin_id')), + builds=dict(id=('origin', 'origin_id'), + revision_id=('revision_origin', + 'revision_origin_id')), + tests=dict(id=('origin', 'origin_id'), + build_id=('build_origin', + 'build_origin_id'))).items(): + for id, pair in id_pair_map.items(): + for obj in data.get(collection, []): + obj[id] = obj[pair[0]] + ':' + obj[pair[1]] + del obj[pair[0]] + del obj[pair[1]] + + # Replace slashes with underscores in resource names + for collection, prop_list in \ + dict(revisions=["patch_mboxes"], + builds=["input_files", "output_files"], + tests=["output_files"]).items(): + for obj in data.get(collection, []): + for prop in prop_list: + for resource in obj.get(prop, []): + resource["name"] = resource["name"].replace("/", "_") + + # Update version + data['version'] = dict(major=JSON_VERSION_MAJOR, + minor=JSON_VERSION_MINOR) + return data + + +# The parent-child relationship tree +TREE = { + "": ["revisions"], + "revisions": ["builds"], + "builds": ["tests"], + "tests": [] +} + +VERSION = Version(JSON_VERSION_MAJOR, JSON_VERSION_MINOR, JSON, TREE, + v1.VERSION, inherit) + +__all__ = ["VERSION"] diff --git a/kcidb_io/schema/v3.py b/kcidb_io/schema/v3.py new file mode 100644 index 0000000..62c34d7 --- /dev/null +++ b/kcidb_io/schema/v3.py @@ -0,0 +1,654 @@ +"""Kernel CI reporting I/O schema v3""" + +import re +from kcidb_io.schema.misc import Version +from kcidb_io.schema import v2 + +# Major version number of JSON schema. +JSON_VERSION_MAJOR = 3 + +# Minor version number of JSON schema. +JSON_VERSION_MINOR = 0 + +# A regular expression pattern matching strings containing accepted Git +# repository URLs +GIT_REPOSITORY_URL_PATTERN = "(https|git)://.*" + +# A regular expression pattern matching strings containing sha1 hashes +SHA1_PATTERN = "[0-9a-f]{40}" + +# A regular expression pattern matching strings containing sha256 hashes +SHA256_PATTERN = "[0-9a-f]{64}" + +# A regular expression pattern matching strings containing Git repository +# commit hash (sha1) +GIT_COMMIT_HASH_PATTERN = f"{SHA1_PATTERN}" + +# A regular expression pattern matching strings containing revision IDs +REVISION_ID_PATTERN = \ + f"{GIT_COMMIT_HASH_PATTERN}" \ + f"(\\+{SHA256_PATTERN})?" + +# A regular expression pattern matching strings containing origin name +ORIGIN_PATTERN = "[a-z0-9_]+" + +# A regular expression pattern matching strings containing an object ID +ORIGIN_ID_PATTERN = f"{ORIGIN_PATTERN}:.*" + +# JSON schema for a named remote resource +JSON_RESOURCE = { + "title": "resource", + "description": "A named remote resource", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": + "Resource name. Must be usable as a local file name for the " + "downloaded resource. Cannot be empty. Should not include " + "directories.", + "pattern": "^[^/]+$", + }, + "url": { + "type": "string", + "format": "uri", + "description": + "Resource URL. Must point to the resource file directly, " + "so it could be downloaded automatically.", + }, + }, + "additionalProperties": False, + "required": [ + "name", + "url", + ], + "examples": [ + { + "name": "console.log", + "url": + "https://artifacts.cki-project.org/pipelines/223563/logs/" + "aarch64_host_1_console.log" + }, + { + "name": "kernel.tar.gz", + "url": + "https://artifacts.cki-project.org/pipelines/224569/" + "kernel-stable-aarch64-" + "a2fc8ee6676067f27d2f5c6e4d512adff3d9938c.tar.gz" + } + ] +} + +# JSON schema for a code revision +JSON_REVISION = { + "title": "revision", + "description": + "A revision of the tested code.\n" + "\n" + "Represents a way the tested source code could be obtained. E.g. " + "checking out a particular commit from a git repo, and applying a " + "set of patches on top.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": + "Revision ID.\n" + "\n" + "Must contain the full commit hash of the revision's base " + "code in the Git repository.\n" + "\n" + "If the revision had patches applied to the base code, " + "the commit hash should be followed by the '+' character " + "and a sha256 hash over newline-terminated sha256 hashes of " + "each applied patch, in order. E.g. generated with this " + "shell command: \"" + "sha256sum *.patch | cut -c-64 | sha256sum | cut -c-64" + "\".\n", + "pattern": f"^{REVISION_ID_PATTERN}$", + "examples": [ + "aa73bcc376865c23e61dcebd467697b527901be8", + "c0d73a868d9b411bd2d0c8e5ff9d98bfa8563cb1" + + "+903638c087335b10293663c682b9aa0076f9f7be478a8e782" + + "8bc22e12d301b42" + ], + }, + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the revision", + "pattern": f"^{ORIGIN_PATTERN}$", + }, + "tree_name": { + "type": "string", + "description": + "The widely-recognized name of the sub-tree (fork) of the " + "main code tree that the revision belongs to.", + "examples": [ + "net-next", + "rdma", + "mainline", + ], + }, + "git_repository_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the Git repository which contains the base code " + "of the revision. The shortest possible https:// URL, or, if " + "that's not available, the shortest possible git:// URL.", + "pattern": f"^{GIT_REPOSITORY_URL_PATTERN}$", + "examples": [ + "https://git.kernel.org/pub/scm/linux/kernel/git/" + "torvalds/linux.git", + ], + }, + "git_commit_hash": { + "type": "string", + "description": + "The full commit hash of the revision's base code", + "pattern": f"^{GIT_COMMIT_HASH_PATTERN}$", + }, + "git_commit_name": { + "type": "string", + "description": + "A human-readable name of the commit containing the base " + "code of the revision, as would be output by " + "\"git describe\", at the discovery time." + }, + "git_repository_branch": { + "type": "string", + "description": + "The Git repository branch in which the commit with the " + "revision's base code was discovered." + }, + "patch_mboxes": { + "type": "array", + "description": + "List of mboxes containing patches applied " + "to the base code of the revision, in order of application", + "items": JSON_RESOURCE, + }, + "message_id": { + "type": "string", + "format": "email", + "description": + "The value of the Message-ID header of the e-mail message " + "introducing this code revision, if any. E.g. a message with " + "the revision's patchset, or a release announcement sent to " + "a maillist.", + }, + "description": { + "type": "string", + "description": + "Human-readable description of the revision. " + "E.g. a release version, or the subject of a patchset message." + }, + "publishing_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was made public. E.g. the timestamp " + "on a patch message, a commit, or a tag.", + "examples": [ + "2020-08-14T23:08:06.967000+00:00", + ], + }, + "discovery_time": { + "type": "string", + "format": "date-time", + "description": + "The time the revision was discovered by the CI system. " + "E.g. the time the CI system found a patch message, or " + "noticed a new commit or a new tag in a git repo.", + "examples": [ + "2020-08-14T23:08:06.967000+00:00", + ], + }, + "contacts": { + "type": "array", + "description": + "List of e-mail addresses of contacts concerned with " + "this revision, such as authors, reviewers, and mail lists", + "items": { + "type": "string", + "description": + "An e-mail address of a contact concerned with this " + "revision, e.g. an author, a reviewer, or a mail list, " + "as in https://tools.ietf.org/html/rfc5322#section-3.4" + }, + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the log file of the attempt to construct this " + "revision from its parts. E.g. 'git am' output.", + }, + "valid": { + "type": "boolean", + "description": + "True if the revision is valid, i.e. if its parts could be " + "combined. False if not, e.g. if its patches failed to apply." + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the revision", + }, + }, + "additionalProperties": False, + "required": [ + "id", + "origin", + ], +} + +# JSON schema for a build of a revision +JSON_BUILD = { + "title": "build", + "description": "A build of a revision", + "type": "object", + "properties": { + "revision_id": { + "type": "string", + "description": + "ID of the built revision. The revision must " + "be valid for the build to be considered valid.", + "pattern": f"^{REVISION_ID_PATTERN}$", + }, + "id": { + "type": "string", + "description": + "Build ID\n" + "\n" + "Must start with a non-empty string identifying the CI " + "system which submitted the build, followed by a colon " + "':' character. The rest of the string is generated by the " + "origin CI system, and must identify the build uniquely " + "among all builds, coming from that CI system.\n", + "pattern": f"^{ORIGIN_ID_PATTERN}$", + }, + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the build", + "pattern": f"^{ORIGIN_PATTERN}$", + }, + "description": { + "type": "string", + "description": + "Human-readable description of the build" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the build was started", + "examples": [ + "2020-08-14T23:08:06.967000+00:00", + ], + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to complete the build", + }, + "architecture": { + "type": "string", + "description": + "Target architecture of the build", + "pattern": "^[a-z0-9_]*$" + }, + "command": { + "type": "string", + "description": + "Full shell command line used to make the build, " + "including environment variables", + }, + "compiler": { + "type": "string", + "description": + "Name and version of the compiler used to make the build", + }, + "input_files": { + "type": "array", + "description": + "A list of build input files. E.g. configuration.", + "items": JSON_RESOURCE, + }, + "output_files": { + "type": "array", + "description": + "A list of build output files: images, packages, etc.", + "items": JSON_RESOURCE, + }, + "config_name": { + "type": "string", + "description": + "A name describing the build configuration options.", + }, + "config_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build configuration file.", + }, + "log_url": { + "type": "string", + "format": "uri", + "description": + "The URL of the build log file.", + }, + "valid": { + "type": "boolean", + "description": + "True if the build is valid, i.e. if it could be completed. " + "False if not.", + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the build", + }, + }, + "additionalProperties": False, + "required": [ + "revision_id", + "id", + "origin", + ], +} + +# JSON schema for a test run on a build +JSON_TEST = { + "title": "test", + "description": + "A test run against a build.\n" + "\n" + "Could represent a result of execution of a test suite program, a " + "result of one of the tests done by the test suite program, as well " + "as a summary of a collection of test suite results.\n" + "\n" + "Each test run should normally have a dot-separated test \"path\" " + "specified in the \"path\" property, which could identify a specific " + "test within a test suite (e.g. \"LTPlite.sem01\"), a whole test " + "suite (e.g. \"LTPlite\"), or the summary of all tests for a build " + "("" - the empty string).", + "type": "object", + "properties": { + "build_id": { + "type": "string", + "description": + "ID of the tested build. The build must be " + "valid for the test run to be considered valid.", + "pattern": f"^{ORIGIN_ID_PATTERN}$", + }, + "id": { + "type": "string", + "description": + "ID of the test run\n" + "\n" + "Must start with a non-empty string identifying the CI " + "system which submitted the test run, followed by a colon " + "':' character. The rest of the string is generated by the " + "origin CI system, and must identify the test run uniquely " + "among all test runs, coming from that CI system.\n", + "pattern": f"^{ORIGIN_ID_PATTERN}$", + }, + "origin": { + "type": "string", + "description": + "The name of the CI system which submitted the test run", + "pattern": f"^{ORIGIN_PATTERN}$", + }, + "environment": { + "type": "object", + "description": + "The environment the test ran in. " + "E.g. a host, a set of hosts, or a lab; " + "amount of memory/storage/CPUs, for each host; " + "process environment variables, etc.", + "properties": { + "description": { + "type": "string", + "description": + "Human-readable description of the environment" + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the environment", + }, + }, + "additionalProperties": False, + }, + "path": { + "type": "string", + "description": + "Dot-separated path to the node in the test classification " + "tree the executed test belongs to. E.g. \"ltp.sem01\". " + "The empty string signifies the root of the tree, i.e. all " + "tests for the build, executed by the origin CI system.", + "pattern": "^[.a-zA-Z0-9_-]*$", + "examples": [ + "", + "ltp", + "ltp.sem01", + ], + }, + "description": { + "type": "string", + "description": + "Human-readable description of the test run" + }, + "status": { + "type": "string", + "description": + "The test status string, one of the following. " + "\"ERROR\" - the test is faulty, " + "the status of the tested code is unknown. " + "\"FAIL\" - the test has failed, the tested code is faulty. " + "\"PASS\" - the test has passed, the tested code is correct. " + "\"DONE\" - the test has finished successfully, " + "the status of the tested code is unknown. " + "\"SKIP\" - the test wasn't executed, " + "the status of the tested code is unknown.\n" + "\n" + "The status names above are listed in priority order " + "(highest to lowest), which could be used for producing a " + "summary status for a collection of test runs, e.g. for all " + "testing done on a build, based on results of executed test " + "suites. The summary status would be the highest priority " + "status across all test runs in a collection.", + "enum": ["ERROR", "FAIL", "PASS", "DONE", "SKIP"], + }, + "waived": { + "type": "boolean", + "description": + "True if the test status should be ignored.\n" + "\n" + "Could be used for reporting test results without affecting " + "the overall test status and alerting the contacts concerned " + "with the tested code revision. For example, for collecting " + "test reliability statistics when the test is first " + "introduced, or is being fixed.", + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": + "The time the test run was started", + "examples": [ + "2020-08-14T23:08:06.967000+00:00", + ], + }, + "duration": { + "type": "number", + "description": + "The number of seconds it took to run the test", + }, + "output_files": { + "type": "array", + "description": + "A list of test outputs: logs, dumps, etc.", + "items": JSON_RESOURCE, + }, + "misc": { + "type": "object", + "description": + "Miscellaneous extra data about the test run", + }, + }, + "additionalProperties": False, + "required": [ + "build_id", + "id", + "origin", + ], +} + +# JSON schema for I/O data +JSON = { + "title": "kcidb", + "description": + "Kernel CI report data. To be submitted to/queried from the common " + "report database.\n" + "\n" + "Objects in the data are identified and linked together using \"id\" " + "and \"*_id\" string properties. Each value of these properties " + "must start with a non-empty string identifying the CI system which " + "submitted the object, followed by a colon ':' character. The rest " + "of the string is generated by the origin CI system, and must " + "identify that object uniquely among all objects of the same type, " + "coming from that CI system.\n" + "\n" + "Any of the immediate properties (except \"version\") can be missing " + "or be an empty list with each submission/query, but only complete " + "data stored in the database should be considered valid.\n" + "\n" + "E.g. a test run referring to a non-existent build is allowed " + "into/from the database, but would only appear in reports once both " + "the build and its revision are present.\n" + "\n" + "No special meaning apart from \"data is missing\" is attached to " + "any immediate or deeper properties being omitted, when they're not " + "required, and no default values should be assumed for them.\n" + "At the same time, no properties can be null.\n" + "\n" + "Extra free-form data can be stored under \"misc\" fields associated " + "with various objects throughout the schema, if necessary. That data " + "could later be used as the basis for defining new properties to " + "house it.", + "type": "object", + "properties": { + "version": { + "type": "object", + "properties": { + "major": { + "type": "integer", + "const": JSON_VERSION_MAJOR, + "description": + "Major number of the schema version.\n" + "\n" + "Increases represent backward-incompatible " + "changes. E.g. deleting or renaming a " + "property, changing a property type, " + "restricting values, making a property " + "required, or adding a new required " + "property.", + }, + "minor": { + "type": "integer", + "minimum": 0, + "maximum": JSON_VERSION_MINOR, + "description": + "Minor number of the schema version.\n" + "\n" + "Increases represent backward-compatible " + "changes. E.g. relaxing value restrictions, " + "making a property optional, or adding a new " + "optional property.", + } + }, + "additionalProperties": False, + "required": [ + "major", + ], + }, + "revisions": { + "description": "List of code revisions", + "type": "array", + "items": JSON_REVISION, + }, + "builds": { + "description": "List of builds", + "type": "array", + "items": JSON_BUILD, + }, + "tests": { + "description": "List of test runs", + "type": "array", + "items": JSON_TEST, + }, + }, + "additionalProperties": False, + "required": [ + "version", + ] +} + + +def inherit(data): + """ + Inherit data, i.e. convert data adhering to the previous version of + the schema to satisfy this version of the schema. + + Args: + data: The data to inherit. + Will be modified in place. + + Returns: + The inherited data. + """ + # Extract origins into separate fields + origin_regex = re.compile(f"^({ORIGIN_PATTERN}):.*") + for obj_list_name in VERSION.previous.tree: + if obj_list_name: + for obj in data.get(obj_list_name, []): + obj["origin"] = origin_regex.search(obj["id"]).group(1) + + # Remove origins from revision IDs + # Calm down pylint: disable=redefined-builtin,invalid-name + def remove_origin(id): + return id[id.index(':') + 1:] + for revision in data.get("revisions", []): + revision["id"] = remove_origin(revision["id"]) + for build in data.get("builds", []): + build["revision_id"] = remove_origin(build["revision_id"]) + + # Rename git_repository_commit* to git_commit* in revisions + for revision in data.get("revisions", []): + for old, new in (("git_repository_commit_hash", "git_commit_hash"), + ("git_repository_commit_name", "git_commit_name")): + if old in revision: + revision[new] = revision.pop(old) + + # Update version + data['version'] = dict(major=JSON_VERSION_MAJOR, + minor=JSON_VERSION_MINOR) + return data + + +# The parent-child relationship tree +TREE = { + "": ["revisions"], + "revisions": ["builds"], + "builds": ["tests"], + "tests": [] +} + +VERSION = Version(JSON_VERSION_MAJOR, JSON_VERSION_MINOR, JSON, TREE, + v2.VERSION, inherit) + +__all__ = ["VERSION"] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..081a1f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018 Red Hat, Inc. All rights reserved. This copyrighted +# material is made available to anyone wishing to use, modify, copy, or +# redistribute it subject to the terms and conditions of the GNU General +# Public License v.2 or later. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +"""Install kcidb_io using setuptools.""" +import setuptools + +with open("README.md", "r") as fh: + LONG_DESCRIPTION = fh.read() + +setuptools.setup( + name="kcidb_io", + version="1", + python_requires=">=3.6", + author="kernelci.org", + author_email="kernelci@groups.io", + description="KCIDB = Linux Kernel CI reporting - I/O data library", + long_description=LONG_DESCRIPTION, + long_description_content_type="text/markdown", + url="https://github.com/kernelci/kcidb_io", + packages=setuptools.find_packages(), + classifiers=[ + "Development Status :: 1 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: GPLv2+", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.6", + "Topic :: Software Development :: Libraries", + ], + install_requires=[ + "jsonschema[format]", + ], + extras_require=dict( + dev=[ + "flake8", + "pylint", + "pytest", + ], + ), +)