Skip to content

Commit

Permalink
Merge branch 'dev' into feat/SOF-6914-1
Browse files Browse the repository at this point in the history
  • Loading branch information
seankwarren authored Jan 18, 2024
2 parents 1d72852 + 835ddbf commit 650f9ce
Show file tree
Hide file tree
Showing 48 changed files with 633 additions and 428 deletions.
59 changes: 54 additions & 5 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,14 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version:
- 3.8.6
- 3.9.x
# Enable after resolving Cython/PyYAML issue https://github.com/yaml/pyyaml/issues/724
# - 3.10.x
# - 3.11.x
# Enable after: AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. Did you mean: 'zipimporter'?
# - 3.12.x

steps:
- name: Checkout this repository
Expand All @@ -54,13 +61,14 @@ jobs:
uses: ./actions/py/test
with:
python-version: ${{ matrix.python-version }}
test-script: src/py/esse/tests/validate.py

run-js-tests:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [14.x, 16.x, 18.x]
node-version:
- 14.x
- 20.x

steps:
- name: Checkout this repository
Expand Down Expand Up @@ -106,13 +114,12 @@ jobs:
- name: Publish JS release
uses: ./actions/js/publish
with:
node-version: 12.21.x
npm-token: ${{ secrets.NPM_TOKEN }}
github-token: ${{ secrets.BOT_GITHUB_TOKEN }}


publish-py-package:
needs: publish-js-package
needs: [run-py-linter, run-py-tests, run-js-tests]
runs-on: ubuntu-latest
if: github.ref_name == 'dev'

Expand All @@ -137,3 +144,45 @@ jobs:
pypi-username: ${{ secrets.PYPI_USERNAME }}
pypi-password: ${{ secrets.PYPI_PASSWORD }}
publish-tag: 'false'

deploy-docs:
needs: [publish-py-package, publish-js-package]
runs-on: ubuntu-latest
strategy:
matrix:
node-version:
- 20.x
steps:
- uses: actions/checkout@v4
with:
lfs: true

- name: Setup NodeJS
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
registry-url: https://registry.npmjs.org

- name: Build
shell: bash -l {0}
run: |
npm install
echo "Building Schemas and Examples"
npm run build:assets-with-docs
echo "List build directory ./docs/js"
ls -l docs/js
- name: Generate Directory Listings
uses: jayanta525/github-pages-directory-listing@v4.0.0
with:
FOLDER: docs/js #directory to generate index

- name: Deploy
uses: peaceiris/actions-gh-pages@v3
# If you're changing the branch from main,
# also change the `main` in `refs/heads/main`
# below accordingly.
# if: github.ref == 'refs/heads/main'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/js
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ dist/
.nyc_output
.eslintcache

.husky
.husky/*
!.husky/pre-commit

schemas.js
schema.js

# Static assets, if any
site
14 changes: 14 additions & 0 deletions .husky/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
. "$(dirname "$0")/_/husky.sh"

SRC_PATTERN="\.json$"
if git diff --cached --name-only | grep --quiet -E "$SRC_PATTERN"
then
echo "JSON assets changed. Running build scripts."
echo "Re-building JS assets."
npm run build:assets
echo "Re-building Python assets: requires virtual environment and dependencies (pip install .'[tests]')."
python build_schemas.py
fi

npx lint-staged
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,18 @@ repos:
rev: 2023.6.13
hooks:
- id: ruff
exclude: ^src/py/mat3ra/esse/data
- id: black
exclude: ^src/py/mat3ra/esse/data
- repo: local
hooks:
- id: generate-python-modules
name: Regenerate data modules classes from static assets
# yamllint disable rule:line-length
entry: |
bash -c 'if ! [ "$(git diff --cached --name-only | grep -e "example|schema")" ]; then echo "Skipping model generation because static files were not changed."; exit 0; fi'
bash -c 'python build_schemas.py'
# yamllint enable rule:line-length
language: system
pass_filenames: false
verbose: true
133 changes: 80 additions & 53 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,127 +4,154 @@

# ESSE

Essential Source of Schemas and Examples (ESSE) contains data formats and associated examples specifically designed for digital materials science (see refs. [1, 2](#links) below).
Essential Source of Schemas and Examples (ESSE) contains data formats and examples for common entities used in digital materials science (see refs. [1, 2](#links) below).

## Installation
Although, the schemas are used to facilitate the operations of [mat3ra.com](https://mat3ra.com), they are designed to be generic and can be used in other applications. The open-source packages developed by Mat3ra.com use the schemas available in this repository.

ESSE can be used as a Node.js or Python package on the server side.
The latest variants of schemas and examples are available at [schemas.mat3ra.com](https://schemas.mat3ra.com/).

### Python
ESSE has a dual-nature as both a Python and a Node.js package.

ESSE is compatible with Python 3.6+. It can be installed as a Python package either via PyPI or the repository as below.

#### PyPI
## 1. Installation

### 1.1. Python

ESSE is compatible with Python 3.8+.

#### 1.1.1. PyPI

```bash
pip install esse
pip install mat3ra-esse
```

#### Repository
#### 1.1.2. Repository

```bash
virtualenv .venv
source .venv/bin/activate
pip install -e PATH_TO_ESSE_REPOSITORY
```

### Node
### 1.2. Node

ESSE can be installed as a Node.js package either via NPM or the repository as below.

#### NPM
#### 1.2.1. NPM

```bash
npm install @exabyte-io/esse.js
```

#### Repository

Add `"esse-js": "file:PATH_TO_ESSE_REPOSITORY"` to `package.json`.

## Usage
## 2. Usage

ESSE contains separate but equivalent interfaces for Python and Javascript.
The package provides `ESSE` class that can be initialized and used as below.

### Python
### 2.1. Usage in Python

```python
from esse import ESSE
from mat3ra.esse import ESSE

es = ESSE()
schema = es.get_schema_by_id("material")
helper = ESSE()
schema = helper.get_schema_by_id("material")
```

### Node
### 2.2. Usage in Node/JS/TS

```javascript
import {ESSE} from "esse-js";

const es = new ESSE();
const schema = es.getSchemaById("material");
const helper = new ESSE();
const schema = helper.getSchemaById("material");
```

## Structure

## 3. Directory Structure

ESSE contains 3 main directories, [schema](schema), [example](example) and [src](src) outlined below.

### Schema
### 3.1. Schema

The schema directory contains the schemas specifying the rules to structure data. A set of core schemas, outlined below, are defined to facilitate the schema modularity.

#### Primitive

[Primitive](schema/core/primitive) directory contains a set of custom primitives that extends default standard primitive types allowed by schema, such as String and Number.
- [Primitive](schema/core/primitive) directory contains a set of custom primitives that extends default standard primitive types allowed by schema, such as String and Number.
Primitives are solely defined by the default primitives and can not be re-constructed from each other.
- [Abstract](schema/core/abstract) directory contains unit-less schemas that are constructed from default and custom primitives.
- [Reusable](schema/core/reusable) directory contains the schemas that are widely used in other schemas to avoid duplication, constructed from the abstract and primitive schemas.
- [Reference](schema/core/reference) directory contains the schemas defining the rules to structure the references to data sources.

#### Abstract
### 3.2. Example

[Abstract](schema/core/abstract) directory contains unit-less schemas that are constructed from default and custom primitives.
This directory contains the examples formed according to the schemas and implements the same directory structure as the schema directory.

#### Reusable
### 3.3. src

[Reusable](schema/core/reusable) directory contains the schemas that are widely used in other schemas to avoid duplication, constructed from the abstract and primitive schemas.
This directory contains Python and Javascript interfaces implementing the functionality to access and validate schemas and examples.

#### Reference

[Reference](schema/core/reference) directory contains the schemas defining the rules to structure the references to data sources.
## 4. Conventions

### Example
### 4.1. Generative vs Non-generative keys
Generative keys are the fields which allow for user input prior to calculation of the final property values. A flag is included in the schema comments on the fields in [property schemas](schema/properties_directory): `isGenerative:true` marks which fields to use as subschemas in the generation of a user input schema. On properties allowing user inputs, additional fields may be tagged, as in [the `file_content` property](schema/properties_directory/non-scalar/file_content.json)

This directory contains the examples formed according to the schemas and implements the same directory structure as the schema directory.

### src
## 5. Development

This directory contains Python and Javascript interfaces implementing the functionality to access and validate schemas and examples.
The schemas and examples are stored as JSON assets. The JSON assets are used to generate JS/TS and PY modules that can be used to access the schemas and examples in the corresponding runtimes. The modules are generated using the [build_schemas.py](./build_schemas.py) and [build_schema.js](./build_schema.js) scripts. The JS modules are generated during the transpilation step of the npm. The PY modules are generated during the development and distributed within the pip package.

The following outlines the development process workflow:

1. Setup: clone the repository and install the dependencies for both JS and PY (as explained below).
2. Edit code and commit changes.
3. Pre commit is used to regenerate the modules.
4. Push the changes to GitHub.
5. GH workflow is used to generate the fully resolved file (without "$ref"s and "$allOf" etc.) and examples and publish them to [schemas.mat3ra.com](http://schemas.mat3ra.com/).
6. Publish the new version of the package to PyPI and npm.

The [pre-commit](.husky/pre-commit) is using both JS and PY runtime(s) to regenerate the schemas and examples.

[//]: # (TODO: consider reusing JS runtime and schemas build script for PY modules for consistency)
NOTE: The PY and JS modules are built from the same JSON sources, but using different runtimes (scripts) and thus may still be different. Only for JS the fully resolved schemas (with merged "$allOf") are created. They are used for the docs website.

### Generative vs Non-generative keys
Generative keys are the fields which allow for user input prior to calculation of the final property values. A flag is included in the schema comments on the fields in [property schemas](schema/properties_directory): `isGenerative:true` marks which fields to use as subschemas in the generation of a user input schema.
- On properties allowing user inputs, additional fields may be tagged, as in [the `file_content` property](schema/properties_directory/non-scalar/file_content.json)
### 5.1. Development in Python

## Tests
When developing in python the following should be taken into account:

Execute the following command from the root directory of this repository to run the tests. The script will run both Javascript and Python tests in which examples are validated against the corresponding schemas.
1. The modules containing the schemas and examples are generated using the [build-schemas.py](./build_schemas.py) script. There is a setup for it to be run automatically on every commit, but it is recommended to run it manually before committing to make sure that the changes are reflected in the modules. This can be done with `pre-commit run --all-files`. The pre-commit package can be installed with `pip install pre-commit`. To rebuild schemas manually, run (note `-e` in install):
```bash
virtualenv .venv
source .venv/bin/activate
pip install -e ."[tests]"
python build_schemas.py
```
2. Tests can be run using the following commands:
```bash
virtualenv .venv
source .venv/bin/activate
pip install ."[tests]"
python -m unittest discover --verbose --catch --start-directory tests/py/esse/
```

### 5.2. Development in Javascript/Typescript

See [package.json](package.json) for the list of available npm commands. The JS modules are generated using the [build_schema.js](./build_schema.js) script. There is a setup for it to be run automatically when the package is installed (see "transpile" directive). To rebuild schemas manually, run:
```bash
bash run-tests.sh
npm install
npm run transpile
```
The script has been tested with node.js v12.16.3 and v8.17.0 as well as Python version 2.7 (up to version 2.3.0) and 3.6+ (for version 2020.10.19 and later).

## Contribution

This repository is an [open-source](LICENSE.md) work-in-progress and we welcome contributions. We suggest forking this repository and introducing the adjustments there, the changes in the fork can further be considered for merging into this repository as it is commonly done on Github (see [3](#links) below).
### 5.3. General Dev Suggestions

## Best Practices
This repository is an [open-source](LICENSE.md) work-in-progress and we welcome contributions. We suggest forking this repository and introducing the adjustments there, the changes in the fork can further be considered for merging into this repository as it is commonly done on GitHub (see [3](#links) below).

- Use unique IDs for schemas. One can run `sh refactor.sh` to automatically set the IDs and reformat examples.
Other suggestions:

- Use unique IDs for schemas
- Do not use circular references in the schemas, instead leave the type as object and add explanation to description.

## Links

1: [Data-centric online ecosystem for digital materials science](https://arxiv.org/pdf/1902.10838.pdf)
## 6. Links

1: [Data-centric online ecosystem for digital materials science](https://arxiv.org/pdf/1902.10838.pdf)
2: [CateCom: A Practical Data-Centric Approach to Categorization of Computational Models](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.2c00112)

3: [GitHub Standard Fork & Pull Request Workflow](https://gist.github.com/Chaser324/ce0505fbed06b947d962)
25 changes: 24 additions & 1 deletion build_schemas.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
* downstream consumption to avoid FS calls in the browser.
*/
const fs = require("fs");
const path = require("path");
const mergeAllOf = require("json-schema-merge-allof");
const { ESSE } = require("./lib/js/esse");

const esse = new ESSE();
const { schemas } = esse;
const { schemas, wrappedExamples } = esse;
const schema = esse.buildGlobalSchema();

fs.writeFileSync(
Expand All @@ -17,3 +19,24 @@ fs.writeFileSync(
);

fs.writeFileSync("./schema.js", "module.exports = " + JSON.stringify(schema), "utf8");

if (process.env.BUILD_ASSETS !== "true") {
process.exit(0);
}

const subfolder = process.env.BUILD_PATH || "./docs/js/";
schemas.forEach((s) => {
if (process.env.SKIP_MERGE_ALLOF !== "true") {
s = mergeAllOf(s, { resolvers: { defaultResolver: mergeAllOf.options.resolvers.title } });
}
const id_as_path = s.$id.replace("-", "_");
const full_path = `${subfolder}/schema/${id_as_path}.json`;
fs.mkdirSync(path.dirname(full_path), { recursive: true });
fs.writeFileSync(full_path, JSON.stringify(s, null, 4), "utf8");
});
wrappedExamples.forEach((e) => {
const id_as_path = e.path.replace("-", "_");
const full_path = `${subfolder}/example/${id_as_path}.json`;
fs.mkdirSync(path.dirname(full_path), { recursive: true });
fs.writeFileSync(full_path, JSON.stringify(e.data, null, 4), "utf8");
});
Loading

0 comments on commit 650f9ce

Please sign in to comment.