From fbff044a80f1f21b1d790a974405308f13ad5939 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 16:51:24 -0700 Subject: [PATCH 1/5] Automate generation of assets objects from YAML configuration --- package.json | 1 + src/js/python/assets.js | 89 +++++++++-------------------------------- 2 files changed, 20 insertions(+), 70 deletions(-) diff --git a/package.json b/package.json index 573dffc6..bdfa8889 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,7 @@ }, "license": "Apache-2.0", "dependencies": { + "js-yaml": "^4.1.0", "underscore": "^1.8.3", "underscore.string": "^3.3.4" }, diff --git a/src/js/python/assets.js b/src/js/python/assets.js index 7be9515d..7fd9e38f 100644 --- a/src/js/python/assets.js +++ b/src/js/python/assets.js @@ -1,8 +1,26 @@ import {makeObjectsFromContextProviderNames, readAssetFile} from "../utils"; +import {loadAll} from "js-yaml"; +const fs = require('fs') const applicationName = "python"; const executableName = "python"; +function models() { + const manifest_content = readAssetFile("python/ml", "model.yaml"); + const manifest_yaml = loadAll(manifest_content); + return manifest_yaml.map(model => { + const long_name = `pyml:model:${model.name}_${model.category}:${model.provider}`; + const short_name = `pyml_${model.name}_${model.category}_${model.provider}`; + return { + "content": readAssetFile("python/ml", `${long_name}.pyi`), + "name": `${short_name}.py`, + "contextProviders": [], + "applicationName": applicationName, + "executableName": executableName, + } + }); +} + // Here, we're returning a delayed-evaluation lambda, to avoid loading the asset files in scenarios where they're not // available, like on the client. export default () => { @@ -98,76 +116,7 @@ export default () => { "applicationName": applicationName, "executableName": executableName }, - { - "content": readAssetFile("python/ml", "pyml:model:adaboosted_trees_regression:sklearn.pyi"), - "name": "model_adaboosted_trees_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:bagged_trees_regression:sklearn.pyi"), - "name": "model_bagged_trees_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:gradboosted_trees_regression:sklearn.pyi"), - "name": "model_gradboosted_trees_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:k_means_clustering:sklearn.pyi"), - "name": "model_k_means_clustering_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:kernel_ridge_regression:sklearn.pyi"), - "name": "model_kernel_ridge_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:lasso_regression:sklearn.pyi"), - "name": "model_lasso_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:multilayer_perceptron_regression:sklearn.pyi"), - "name": "model_mlp_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:random_forest_classification:sklearn.pyi"), - "name": "model_random_forest_classification_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:random_forest_regression:sklearn.pyi"), - "name": "model_random_forest_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, - { - "content": readAssetFile("python/ml", "pyml:model:ridge_regression:sklearn.pyi"), - "name": "model_ridge_regression_sklearn.py", - "contextProviders": [], - "applicationName": applicationName, - "executableName": executableName - }, + ...models(), { "content": readAssetFile("python/ml", "pyml:post_processing:parity_plot:matplotlib.pyi"), "name": "post_processing_parity_plot_matplotlib.py", From 30f853d03287d7d34140b3ddc05f8fb8b999c1a9 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 18:00:15 -0700 Subject: [PATCH 2/5] Fix typo in short name of model --- src/js/python/assets.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/js/python/assets.js b/src/js/python/assets.js index 7fd9e38f..28b80f5b 100644 --- a/src/js/python/assets.js +++ b/src/js/python/assets.js @@ -10,7 +10,7 @@ function models() { const manifest_yaml = loadAll(manifest_content); return manifest_yaml.map(model => { const long_name = `pyml:model:${model.name}_${model.category}:${model.provider}`; - const short_name = `pyml_${model.name}_${model.category}_${model.provider}`; + const short_name = `model_${model.name}_${model.category}_${model.provider}`; return { "content": readAssetFile("python/ml", `${long_name}.pyi`), "name": `${short_name}.py`, From a8c768fd065e0adfb715b05d84d80971c5ced7b0 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 18:00:26 -0700 Subject: [PATCH 3/5] Update name of MLP --- src/js/python/tree.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/js/python/tree.js b/src/js/python/tree.js index aaa14d41..25ecfceb 100644 --- a/src/js/python/tree.js +++ b/src/js/python/tree.js @@ -179,7 +179,7 @@ export default { ], "monitors": [monitors.standard_output], }, - "pyml:model:multilayer_perceptron:sklearn": { + "pyml:model:multilayer_perceptron_regression:sklearn": { "input": [ { "name": "model_mlp_sklearn.py", From 6a6dbc35dba26c5eb957aae2e83f41ff3833cf44 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 18:03:55 -0700 Subject: [PATCH 4/5] Remove redundant FS requirement --- src/js/python/assets.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/js/python/assets.js b/src/js/python/assets.js index 28b80f5b..b508e809 100644 --- a/src/js/python/assets.js +++ b/src/js/python/assets.js @@ -1,7 +1,6 @@ import {makeObjectsFromContextProviderNames, readAssetFile} from "../utils"; import {loadAll} from "js-yaml"; -const fs = require('fs') const applicationName = "python"; const executableName = "python"; From f0879248d7f47f2300bbab760161933d8d209390 Mon Sep 17 00:00:00 2001 From: James Dean <24254612+AcylSilane@users.noreply.github.com> Date: Fri, 23 Apr 2021 18:41:03 -0700 Subject: [PATCH 5/5] Add automatic generator for models based on models.yaml. Tree evaluation is now lazily done, since we ran into the same eager-eval problem that assets.js ran into when the flavors package was made initially. --- src/js/python/tree.js | 414 +++++++++++++++++------------------------- 1 file changed, 164 insertions(+), 250 deletions(-) diff --git a/src/js/python/tree.js b/src/js/python/tree.js index 25ecfceb..55bd61a0 100644 --- a/src/js/python/tree.js +++ b/src/js/python/tree.js @@ -1,259 +1,173 @@ import monitors from "../allowed_monitors"; -export default { - "python": { - "monitors": [ - monitors.standard_output - ], - "results": [], - "flavors": { - "hello_world": { - "input": [ - { - "name": "script.py", - "templateName": "hello_world.py", - }, - { - "name": "requirements.txt", - } - ], +import {readAssetFile} from "../utils"; +import {loadAll} from "js-yaml"; + +function models() { + const manifest_content = readAssetFile("python/ml", "model.yaml"); + const manifest_yaml = loadAll(manifest_content); + return Object.assign({}, + ...manifest_yaml.map(model => { + const long_name = `pyml:model:${model.name}_${model.category}:${model.provider}`; + const short_name = `model_${model.name}_${model.category}_${model.provider}`; + return { + [long_name]: { + "input": [ + { + "name": `${short_name}.py`, + "templateName": `${short_name}.py`, + } + ], + "results": [ + "workflow:pyml_predict" + ], + "monitors": [monitors.standard_output], + } + } + }) + ); +} + +export default () => { + const tree = [ + { + "python": { "monitors": [ monitors.standard_output ], - }, - "espresso_xml_get_qpt_irr": { - "input": [ - { - "name": "espresso_xml_get_qpt_irr.py", + "results": [], + "flavors": { + "hello_world": { + "input": [ + { + "name": "script.py", + "templateName": "hello_world.py", + }, + { + "name": "requirements.txt", + } + ], + "monitors": [ + monitors.standard_output + ], }, - ], - "monitors": [ - monitors.standard_output - ], - }, - "pyml:setup_variables_packages": { - "input": [ - { - "name": "settings.py", - "templateName": "pyml_settings.py" + "espresso_xml_get_qpt_irr": { + "input": [ + { + "name": "espresso_xml_get_qpt_irr.py", + }, + ], + "monitors": [ + monitors.standard_output + ], }, - { - "name": "requirements.txt", - "templateName": "pyml_requirements.txt" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:custom": { - "input": [ - { - "name": "pyml_custom.py", - "templateName": "pyml_custom.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:data_input:read_csv:pandas": { - "input": [ - { - "name": "data_input_read_csv_pandas.py", - "templateName": "data_input_read_csv_pandas.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:data_input:train_test_split:sklearn": { - "input": [ - { - "name": "data_input_train_test_split_sklearn.py", - "templateName": "data_input_train_test_split_sklearn.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:pre_processing:min_max_scaler:sklearn": { - "input": [ - { - "name": "pre_processing_min_max_sklearn.py", - "templateName": "pre_processing_min_max_sklearn.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:pre_processing:remove_duplicates:pandas": { - "input": [ - { - "name": "pre_processing_remove_duplicates_pandas.py", - "templateName": "pre_processing_remove_duplicates_pandas.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:pre_processing:remove_missing:pandas": { - "input": [ - { - "name": "pre_processing_remove_missing_pandas.py", - "templateName": "pre_processing_remove_missing_pandas.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:pre_processing:standardization:sklearn": { - "input": [ - { - "name": "pre_processing_standardization_sklearn.py", - "templateName": "pre_processing_standardization_sklearn.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:adaboosted_trees_regression:sklearn": { - "input": [ - { - "name": "model_adaboosted_trees_regression_sklearn.py", - "templateName": "model_adaboosted_trees_regression_sklearn.py" - } - ], - "monitors": [monitors.standard_output], - "results": [ - "workflow:pyml_predict" - ], - }, - "pyml:model:bagged_trees_regression:sklearn": { - "input": [ - { - "name": "model_bagged_trees_regression_sklearn.py", - "templateName": "model_bagged_trees_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:gradboosted_trees_regression:sklearn": { - "input": [ - { - "name": "model_gradboosted_trees_regression_sklearn.py", - "templateName": "model_gradboosted_trees_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:k_means_clustering:sklearn": { - "input": [ - { - "name": "model_k_means_clustering_sklearn.py", - "templateName": "model_k_means_clustering_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:kernel_ridge_regression:sklearn": { - "input": [ - { - "name": "model_kernel_ridge_regression_sklearn.py", - "templateName": "model_kernel_ridge_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:lasso_regression:sklearn": { - "input": [ - { - "name": "model_lasso_regression_sklearn.py", - "templateName": "model_lasso_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:multilayer_perceptron_regression:sklearn": { - "input": [ - { - "name": "model_mlp_sklearn.py", - "templateName": "model_mlp_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:random_forest_classification:sklearn": { - "input": [ - { - "name": "model_random_forest_classification_sklearn.py", - "templateName": "model_random_forest_classification_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:random_forest_regression:sklearn": { - "input": [ - { - "name": "model_random_forest_regression_sklearn.py", - "templateName": "model_random_forest_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:model:ridge_regression:sklearn": { - "input": [ - { - "name": "model_ridge_regression_sklearn.py", - "templateName": "model_ridge_regression_sklearn.py" - } - ], - "results": [ - "workflow:pyml_predict" - ], - "monitors": [monitors.standard_output], - }, - "pyml:post_processing:parity_plot:matplotlib": { - "input": [ - { - "name": "post_processing_parity_plot_matplotlib.py", - "templateName": "post_processing_parity_plot_matplotlib.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:post_processing:pca_2d_clusters:matplotlib": { - "input": [ - { - "name": "post_processing_pca_2d_clusters_matplotlib.py", - "templateName": "post_processing_pca_2d_clusters_matplotlib.py" - } - ], - "monitors": [monitors.standard_output], - }, - "pyml:post_processing:roc_curve:sklearn": { - "input": [ - { - "name": "post_processing_roc_curve_sklearn.py", - "templateName": "post_processing_roc_curve_sklearn.py" - } - ], - "monitors": [monitors.standard_output], - }, + "pyml:setup_variables_packages": { + "input": [ + { + "name": "settings.py", + "templateName": "pyml_settings.py" + }, + { + "name": "requirements.txt", + "templateName": "pyml_requirements.txt" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:custom": { + "input": [ + { + "name": "pyml_custom.py", + "templateName": "pyml_custom.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:data_input:read_csv:pandas": { + "input": [ + { + "name": "data_input_read_csv_pandas.py", + "templateName": "data_input_read_csv_pandas.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:data_input:train_test_split:sklearn": { + "input": [ + { + "name": "data_input_train_test_split_sklearn.py", + "templateName": "data_input_train_test_split_sklearn.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:pre_processing:min_max_scaler:sklearn": { + "input": [ + { + "name": "pre_processing_min_max_sklearn.py", + "templateName": "pre_processing_min_max_sklearn.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:pre_processing:remove_duplicates:pandas": { + "input": [ + { + "name": "pre_processing_remove_duplicates_pandas.py", + "templateName": "pre_processing_remove_duplicates_pandas.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:pre_processing:remove_missing:pandas": { + "input": [ + { + "name": "pre_processing_remove_missing_pandas.py", + "templateName": "pre_processing_remove_missing_pandas.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:pre_processing:standardization:sklearn": { + "input": [ + { + "name": "pre_processing_standardization_sklearn.py", + "templateName": "pre_processing_standardization_sklearn.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:post_processing:parity_plot:matplotlib": { + "input": [ + { + "name": "post_processing_parity_plot_matplotlib.py", + "templateName": "post_processing_parity_plot_matplotlib.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:post_processing:pca_2d_clusters:matplotlib": { + "input": [ + { + "name": "post_processing_pca_2d_clusters_matplotlib.py", + "templateName": "post_processing_pca_2d_clusters_matplotlib.py" + } + ], + "monitors": [monitors.standard_output], + }, + "pyml:post_processing:roc_curve:sklearn": { + "input": [ + { + "name": "post_processing_roc_curve_sklearn.py", + "templateName": "post_processing_roc_curve_sklearn.py" + } + ], + "monitors": [monitors.standard_output], + }, + ...models() + } + } } - } + ] + return tree.map(a=>a); }