From ea614b380d1f35cb57935d4dc9a19e600b70f110 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Thu, 21 Nov 2024 22:43:09 +0100 Subject: [PATCH 1/4] mlp, tempcnn, svm, lightae wrappers --- .../processes/ml_class_random_forest.json | 67 ------ .../processes/ml_fit_class_random_forest.json | 113 ---------- inst/sits/processes3.R | 202 ++++++++++++++++++ 3 files changed, 202 insertions(+), 180 deletions(-) delete mode 100644 inst/sits/processes/ml_class_random_forest.json delete mode 100644 inst/sits/processes/ml_fit_class_random_forest.json diff --git a/inst/sits/processes/ml_class_random_forest.json b/inst/sits/processes/ml_class_random_forest.json deleted file mode 100644 index b8cc873..0000000 --- a/inst/sits/processes/ml_class_random_forest.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "id": "ml_class_random_forest", - "summary": "Initialize a Random Forest classification model", - "description": "Defines a Random Forest classification model with parameters for tree count, split variables, and randomization seed.", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "num_trees", - "description": "The number of trees to build within the Random Forest classification.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "max_variables", - "description": "Specifies the number of variables considered for each split at a node.\n\nOptions include:\n\n- *integer*: Specifies an exact number of variables per split.\n- `all`: All variables are considered for each split.\n- `log2`: Uses the base-2 logarithm of the variable count per split.\n- `onethird`: Uses one-third of the total variables per split.\n- `sqrt`: Uses the square root of the number of variables per split, often a default for classification.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "seed", - "description": "Optional random seed for sampling. If not provided or `null`, results may vary with each execution.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model definition that can be used in training or saved for later use.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/inst/sits/processes/ml_fit_class_random_forest.json b/inst/sits/processes/ml_fit_class_random_forest.json deleted file mode 100644 index a57bb66..0000000 --- a/inst/sits/processes/ml_fit_class_random_forest.json +++ /dev/null @@ -1,113 +0,0 @@ -{ - "id": "ml_fit_class_random_forest", - "summary": "Train a random forest classification model", - "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "training_set", - "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyses to learn patterns and relationships within the data.", - "schema": [ - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "bands" - } - ] - }, - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "other" - } - ] - } - ] - }, - { - "name": "target", - "description": "The dimension in the training set that represents the dependent variable for Random Forest classification.", - "schema": { - "type": "string" - } - }, - { - "name": "max_variables", - "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "num_trees", - "description": "The number of trees build within the Random Forest classification.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "train_test_split", - "description": "Splits the training_set into random train and test subsets.", - "optional": true, - "default": 0.8, - "schema": { - "type": "number" - } - }, - { - "name": "random_state", - "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} \ No newline at end of file diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R index 2ef546c..151a328 100644 --- a/inst/sits/processes3.R +++ b/inst/sits/processes3.R @@ -45,6 +45,208 @@ ml_random_forest <- function(num_trees = 100, model } +#* @openeo-process +ml_svm <- function(kernel = "radial", + degree = 3, + coef0 = 0, + cost = 10, + tolerance = 0.001, + epsilon = 0.1, + cachesize = 1000, + random_state = NULL, + classification = TRUE) { + base::print("ml_svm()") + formula = sits::sits_formula_linear() + + if (!classification) { + stop("Regression is not supported", call. = FALSE) + } + + if (!is.null(random_state)) { + set.seed(random_state) + } + + model <- sits::sits_svm( + formula = formula, + cachesize = cachesize, + kernel = kernel, + degree = degree, + coef0 = coef0, + cost = cost, + tolerance = tolerance, + epsilon = epsilon + ) + + base::attr(model, "random_state") <- random_state + + model +} + + +#* @openeo-process +ml_mlp <- function(layers = base::list(512, 512, 512), + dropout_rates = base::list(0.2, 0.3, 0.4), + optimizer = "adam", + learning_rate = 0.001, + epsilon = 0.00000001, + weight_decay = 0.000001, + epochs = 100, + batch_size = 64, + random_state = NULL, + classification = TRUE) { + base::print("ml_mlp()") + + if (!classification) { + stop("Regression is not supported", call. = FALSE) + } + + if (!is.null(random_state)) { + set.seed(random_state) + } + + optimizer_fn <- switch( + optimizer, + "adam" = torch::optim_adamw, + "adabound" = torch::optim_adabound, + "adabelief" = torch::optim_adabelief, + "madagrad" = torch::optim_madagrad, + "nadam" = torch::optim_nadam, + "qhadam" = torch::optim_qhadam, + "radam" = torch::optim_radam, + "swats" = torch::optim_swats, + "yogi" = torch::optim_yogi, + stop("Unsupported optimizer. currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported. ", call. = FALSE) + ) + + opt_hparams <- base::list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay) + + layers <- base::unlist(layers) + dropout_rates <- base::unlist(dropout_rates) + + + model <- sits::sits_mlp( + layers = layers, + dropout_rates = dropout_rates, + optimizer = optimizer_fn, + opt_hparams = opt_hparams, + epochs = epochs, + batch_size = batch_size + ) + + base::attr(model, "random_state") <- random_state + + model +} + +#* @openeo-process +ml_tempcnn <- function(cnn_layers = base::list(64, 64, 64), + cnn_kernels = base::list(5, 5, 5), + cnn_dropout_rates = base::list(0.2, 0.2, 0.2), + dense_layer_nodes = 256, + dense_layer_dropout_rate = 0.5, + optimizer = "adam", + learning_rate = 0.0005, + epsilon = 0.00000001, + weight_decay = 0.000001, + lr_decay_epochs = 1, + lr_decay_rate = 0.95, + epochs = 150, + batch_size = 64, + random_state = NULL) { + base::print("ml_tempcnn()") + + + if (!is.null(random_state)) { + set.seed(random_state) + } + + optimizer_fn <- switch( + optimizer, + "adam" = torch::optim_adamw, + "adabound" = torch::optim_adabound, + "adabelief" = torch::optim_adabelief, + "madagrad" = torch::optim_madagrad, + "nadam" = torch::optim_nadam, + "qhadam" = torch::optim_qhadam, + "radam" = torch::optim_radam, + "swats" = torch::optim_swats, + "yogi" = torch::optim_yogi, + stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE) + ) + + opt_hparams <- base::list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay) + + cnn_layers <- base::unlist(cnn_layers) + cnn_kernels <- base::unlist(cnn_kernels) + cnn_dropout_rates <- base::unlist(cnn_dropout_rates) + + model <- sits::sits_tempcnn( + cnn_layers = cnn_layers, + cnn_kernels = cnn_kernels, + cnn_dropout_rates = cnn_dropout_rates, + dense_layer_nodes = dense_layer_nodes, + dense_layer_dropout_rate = dense_layer_dropout_rate, + optimizer = optimizer_fn, + opt_hparams = opt_hparams, + lr_decay_epochs = lr_decay_epochs, + lr_decay_rate = lr_decay_rate, + epochs = epochs, + batch_size = batch_size + ) + + base::attr(model, "random_state") <- random_state + + model +} + +#* @openeo-process +ml_lighttae <- function(epochs = 150, + batch_size = 128, + optimizer = "adam", + learning_rate = 0.0005, + epsilon = 0.00000001, + weight_decay = 0.0007, + lr_decay_epochs = 50, + lr_decay_rate = 1, + random_state = NULL) { + base::print("ml_lighttae()") + + if (!is.null(random_state)) { + set.seed(random_state) + } + + optimizer_fn <- switch( + optimizer, + "adam" = torch::optim_adamw, + "adabound" = torch::optim_adabound, + "adabelief" = torch::optim_adabelief, + "madagrad" = torch::optim_madagrad, + "nadam" = torch::optim_nadam, + "qhadam" = torch::optim_qhadam, + "radam" = torch::optim_radam, + "swats" = torch::optim_swats, + "yogi" = torch::optim_yogi, + stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE) + ) + + opt_hparams <- list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay) + + model <- sits::sits_lighttae( + epochs = epochs, + batch_size = batch_size, + optimizer = optimizer_fn, + opt_hparams = opt_hparams, + lr_decay_epochs = lr_decay_epochs, + lr_decay_rate = lr_decay_rate + ) + + base::attr(model, "random_state") <- random_state + + model +} + + + #* @openeo-process ml_fit <- function(model, training_set, target="label") { base::print("ml_fit()") From 3af11bd9a64897fda1219b7d40730d505528262b Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Fri, 22 Nov 2024 13:47:11 +0100 Subject: [PATCH 2/4] schemas poc for ml and dl --- inst/sits/processes/ml_lighttae.json | 128 +++++++++++++++ inst/sits/processes/ml_mlp.json | 138 ++++++++++++++++ inst/sits/processes/ml_random_forest.json | 29 ++-- inst/sits/processes/ml_svm.json | 108 +++++++++++++ inst/sits/processes/ml_tempcnn.json | 186 ++++++++++++++++++++++ inst/sits/processes3.R | 6 +- 6 files changed, 582 insertions(+), 13 deletions(-) create mode 100644 inst/sits/processes/ml_lighttae.json create mode 100644 inst/sits/processes/ml_mlp.json create mode 100644 inst/sits/processes/ml_svm.json create mode 100644 inst/sits/processes/ml_tempcnn.json diff --git a/inst/sits/processes/ml_lighttae.json b/inst/sits/processes/ml_lighttae.json new file mode 100644 index 0000000..43162ec --- /dev/null +++ b/inst/sits/processes/ml_lighttae.json @@ -0,0 +1,128 @@ +{ + "id": "ml_lighttae", + "summary": "Initialize a Lightweight Temporal Self-Attention Encoder (LTAE) model", + "description": "Creates and configures a Lightweight Temporal Self-Attention Encoder (LTAE) model. LTAE is designed for efficient modeling of temporal dependencies in sequential data using self-attention mechanisms. Parameters such as optimizer, learning rate, and learning rate decay schedule can be specified.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "epochs", + "description": "The number of training epochs. Defaults to 150.", + "optional": true, + "default": 150, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "batch_size", + "description": "The size of batches for training. Defaults to 128.", + "optional": true, + "default": 128, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "optimizer", + "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.", + "optional": true, + "default": "adam", + "schema": { + "type": "string", + "enum": [ + "adam", + "adabound", + "adabelief", + "madagrad", + "nadam", + "qhadam", + "radam", + "swats", + "yogi" + ] + } + }, + { + "name": "learning_rate", + "description": "The initial learning rate for training. Defaults to 0.0005.", + "optional": true, + "default": 0.0005, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.", + "optional": true, + "default": 0.00000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "weight_decay", + "description": "The weight decay (L2 penalty) value for regularization. Defaults to 0.0007.", + "optional": true, + "default": 0.0007, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "lr_decay_epochs", + "description": "The number of epochs after which the learning rate is decayed. Defaults to 50.", + "optional": true, + "default": 50, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "lr_decay_rate", + "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 1.", + "optional": true, + "default": 1, + "schema": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + }, + { + "name": "random_state", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "An untrained Lightweight Temporal Self-Attention Encoder (LTAE) model instance.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "description": "Research paper describing the Lightweight Temporal Self-Attention Encoder (LTAE).", + "citation": "V. S. F. Garnot and L. Landrieu, “Lightweight Temporal Self-attention for Classifying Satellite Images Time Series,” in Advanced Analytics and Learning on Temporal Data, 2020, pp. 171–181, doi: 10.1007/978-3-030-65742-0_12.", + "url": "https://doi.org/10.1007/978-3-030-65742-0_12" + } + ] +} diff --git a/inst/sits/processes/ml_mlp.json b/inst/sits/processes/ml_mlp.json new file mode 100644 index 0000000..76bd5f6 --- /dev/null +++ b/inst/sits/processes/ml_mlp.json @@ -0,0 +1,138 @@ +{ + "id": "ml_mlp", + "summary": "Initialize a Multi-Layer Perceptron (MLP) model", + "description": "Creates and configures a Multi-Layer Perceptron (MLP) model. Parameters such as the architecture, optimizer, learning rate, and other training options can be specified.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "layers", + "description": "A list specifying the number of neurons in each layer of the MLP. Defaults to [512, 512, 512].", + "optional": true, + "default": [512, 512, 512], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 1 + } + }, + { + "name": "dropout_rates", + "description": "A list of dropout rates for each layer, corresponding to the layers. Must match the number of layers. Defaults to [0.2, 0.3, 0.4].", + "optional": true, + "default": [0.2, 0.3, 0.4], + "schema": { + "type": "array", + "items": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "minItems": 1 + } + }, + { + "name": "optimizer", + "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.", + "optional": true, + "default": "adam", + "schema": { + "type": "string", + "enum": [ + "adam", + "adabound", + "adabelief", + "madagrad", + "nadam", + "qhadam", + "radam", + "swats", + "yogi" + ] + } + }, + { + "name": "learning_rate", + "description": "The initial learning rate for training. Defaults to 0.001.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.", + "optional": true, + "default": 0.00000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "weight_decay", + "description": "The weight decay (L2 penalty) value for regularization. Defaults to 1e-6.", + "optional": true, + "default": 0.000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epochs", + "description": "The number of training epochs. Defaults to 100.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "batch_size", + "description": "The size of batches for training. Defaults to 64.", + "optional": true, + "default": 64, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "random_state", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + }, + { + "name": "classification", + "description": "Specifies whether the MLP model is for classification. Defaults to `true`.", + "optional": true, + "default": true, + "schema": { + "type": "boolean" + } + } + ], + "returns": { + "description": "An untrained MLP model instance.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } +} diff --git a/inst/sits/processes/ml_random_forest.json b/inst/sits/processes/ml_random_forest.json index bcd5471..017db3b 100644 --- a/inst/sits/processes/ml_random_forest.json +++ b/inst/sits/processes/ml_random_forest.json @@ -17,20 +17,29 @@ } }, { - "name": "max_depth", - "description": "The maximum depth of each tree in the Random Forest. If `null`, nodes expand until all leaves are pure or contain fewer than min_samples_split samples.", + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } + "default": "sqrt", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] }, { "name": "random_state", - "description": "Seed for the random number generator. If `null`, no seed is used, and results may vary.", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", "optional": true, "default": null, "schema": { diff --git a/inst/sits/processes/ml_svm.json b/inst/sits/processes/ml_svm.json new file mode 100644 index 0000000..860db50 --- /dev/null +++ b/inst/sits/processes/ml_svm.json @@ -0,0 +1,108 @@ +{ + "id": "ml_svm", + "summary": "Initialize an SVM model", + "description": "Creates and configures a Support Vector Machine (SVM) model. Parameters such as kernel type, cost, and tolerance can be specified.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "kernel", + "description": "The kernel type to be used in the SVM. Supported values are 'linear', 'poly', 'radial', and 'sigmoid'. Defaults to 'radial'.", + "optional": true, + "default": "radial", + "schema": { + "type": "string", + "enum": ["linear", "poly", "radial", "sigmoid"] + } + }, + { + "name": "degree", + "description": "The degree of the polynomial kernel function. Ignored by kernels other than 'poly'. Defaults to 3.", + "optional": true, + "default": 3, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "coef0", + "description": "The independent term in kernel functions. Used by 'poly' and 'sigmoid' kernels. Defaults to 0.", + "optional": true, + "default": 0, + "schema": { + "type": "number" + } + }, + { + "name": "cost", + "description": "The penalty parameter C of the error term. A higher value encourages fewer margin violations. Defaults to 10.", + "optional": true, + "default": 10, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "tolerance", + "description": "Tolerance for stopping criterion. Defaults to 0.001.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function. Defaults to 0.1.", + "optional": true, + "default": 0.1, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "cachesize", + "description": "The size of the kernel cache in megabytes. Defaults to 1000 MB.", + "optional": true, + "default": 1000, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "random_state", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + }, + { + "name": "classification", + "description": "Specifies whether the SVM model is for classification. Defaults to `true`.", + "optional": true, + "default": true, + "schema": { + "type": "boolean" + } + } + ], + "returns": { + "description": "An untrained SVM model instance.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } +} diff --git a/inst/sits/processes/ml_tempcnn.json b/inst/sits/processes/ml_tempcnn.json new file mode 100644 index 0000000..c16526f --- /dev/null +++ b/inst/sits/processes/ml_tempcnn.json @@ -0,0 +1,186 @@ +{ + "id": "ml_tempcnn", + "summary": "Initialize a Temporal Convolutional Neural Network (TempCNN) model", + "description": "Creates and configures a Temporal Convolutional Neural Network (TempCNN) model for time-series data. Parameters such as the convolutional architecture, dense layers, optimizer, and learning rate schedule can be specified.", + "categories": [ + "machine learning", + "time-series" + ], + "experimental": true, + "parameters": [ + { + "name": "cnn_layers", + "description": "A list specifying the number of filters in each convolutional layer. Defaults to [64, 64, 64].", + "optional": true, + "default": [64, 64, 64], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 1 + } + }, + { + "name": "cnn_kernels", + "description": "A list specifying the kernel size for each convolutional layer. Must match the number of CNN layers. Defaults to [5, 5, 5].", + "optional": true, + "default": [5, 5, 5], + "schema": { + "type": "array", + "items": { + "type": "integer", + "minimum": 1 + }, + "minItems": 1 + } + }, + { + "name": "cnn_dropout_rates", + "description": "A list of dropout rates for each convolutional layer. Must match the number of CNN layers. Defaults to [0.2, 0.2, 0.2].", + "optional": true, + "default": [0.2, 0.2, 0.2], + "schema": { + "type": "array", + "items": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "minItems": 1 + } + }, + { + "name": "dense_layer_nodes", + "description": "The number of nodes in the dense layer following the convolutional layers. Defaults to 256.", + "optional": true, + "default": 256, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "dense_layer_dropout_rate", + "description": "The dropout rate for the dense layer. Defaults to 0.5.", + "optional": true, + "default": 0.5, + "schema": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + }, + { + "name": "optimizer", + "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.", + "optional": true, + "default": "adam", + "schema": { + "type": "string", + "enum": [ + "adam", + "adabound", + "adabelief", + "madagrad", + "nadam", + "qhadam", + "radam", + "swats", + "yogi" + ] + } + }, + { + "name": "learning_rate", + "description": "The initial learning rate for training. Defaults to 0.0005.", + "optional": true, + "default": 0.0005, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.", + "optional": true, + "default": 0.00000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "weight_decay", + "description": "The weight decay (L2 penalty) value for regularization. Defaults to 1e-6.", + "optional": true, + "default": 0.000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "lr_decay_epochs", + "description": "The number of epochs after which the learning rate is decayed. Defaults to 1.", + "optional": true, + "default": 1, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "lr_decay_rate", + "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 0.95.", + "optional": true, + "default": 0.95, + "schema": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + }, + { + "name": "epochs", + "description": "The number of training epochs. Defaults to 150.", + "optional": true, + "default": 150, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "batch_size", + "description": "The size of batches for training. Defaults to 64.", + "optional": true, + "default": 64, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "random_state", + "description": "Seed for the random number generator. If `null`, no seed is used, and results may vary.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "An untrained TempCNN model instance.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } +} diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R index 151a328..6544d81 100644 --- a/inst/sits/processes3.R +++ b/inst/sits/processes3.R @@ -30,7 +30,7 @@ load_collection <- function(id, #* @openeo-process ml_random_forest <- function(num_trees = 100, - max_depth = NULL, + max_variables ="sqrt", random_state = NULL, classification = TRUE) { base::print("ml_random_forest()") @@ -38,8 +38,8 @@ ml_random_forest <- function(num_trees = 100, stop("Regression is not supported", call. = FALSE) } model <- sits::sits_rfor( - num_trees = num_trees, - mtry = max_depth + num_trees = num_trees + # mtry = max_variables , TO DO, handle max_variables param, use default for now ) base::attr(model, "random_state") <- random_state model From 4b9ccae5436b83c067853828e28c00186d66d5dd Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Mon, 25 Nov 2024 11:56:29 +0100 Subject: [PATCH 3/4] tae model plus schema --- inst/sits/processes/ml_tae.json | 118 ++++++++++++++++++++++++++++++++ inst/sits/processes3.R | 47 +++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 inst/sits/processes/ml_tae.json diff --git a/inst/sits/processes/ml_tae.json b/inst/sits/processes/ml_tae.json new file mode 100644 index 0000000..6263ae7 --- /dev/null +++ b/inst/sits/processes/ml_tae.json @@ -0,0 +1,118 @@ +{ + "id": "ml_tae", + "summary": "Initialize a Temporal Attention Encoder (TAE) model", + "description": "Creates and configures a Temporal Attention Encoder (TAE) model. TAE leverages temporal attention mechanisms to process and analyze sequential data effectively. Parameters such as optimizer, learning rate, and decay schedules can be customized.", + "categories": [ + "machine learning", + "time-series", + "attention" + ], + "experimental": true, + "parameters": [ + { + "name": "epochs", + "description": "The number of training epochs. Defaults to 150.", + "optional": true, + "default": 150, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "batch_size", + "description": "The size of batches for training. Defaults to 64.", + "optional": true, + "default": 64, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "optimizer", + "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.", + "optional": true, + "default": "adam", + "schema": { + "type": "string", + "enum": [ + "adam", + "adabound", + "adabelief", + "madagrad", + "nadam", + "qhadam", + "radam", + "swats", + "yogi" + ] + } + }, + { + "name": "learning_rate", + "description": "The initial learning rate for training. Defaults to 0.001.", + "optional": true, + "default": 0.001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "epsilon", + "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.", + "optional": true, + "default": 0.00000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "weight_decay", + "description": "The weight decay (L2 penalty) value for regularization. Defaults to 0.000001.", + "optional": true, + "default": 0.000001, + "schema": { + "type": "number", + "minimum": 0 + } + }, + { + "name": "lr_decay_epochs", + "description": "The number of epochs after which the learning rate is decayed. Defaults to 1.", + "optional": true, + "default": 1, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "lr_decay_rate", + "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 0.95.", + "optional": true, + "default": 0.95, + "schema": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + } + ], + "returns": { + "description": "An untrained Temporal Attention Encoder (TAE) model instance.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "description": "Research paper describing the Temporal Attention Encoder (TAE) and its application in satellite image time series classification.", + "citation": "V. Garnot, L. Landrieu, S. Giordano, and N. Chehata, “Satellite Image Time Series Classification With Pixel-Set Encoders and Temporal Self-Attention,” in 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2020, pp. 12322–12331, doi: 10.1109/CVPR42600.2020.01234.", + "url": "https://doi.org/10.1109/CVPR42600.2020.01234" + } + ] +} diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R index 6544d81..f9f5269 100644 --- a/inst/sits/processes3.R +++ b/inst/sits/processes3.R @@ -199,6 +199,53 @@ ml_tempcnn <- function(cnn_layers = base::list(64, 64, 64), model } +#* @openeo-process +ml_tae <- function(epochs = 150, + batch_size = 64, + optimizer = "adam", + learning_rate = 0.001, + epsilon = 0.00000001, + weight_decay = 0.000001, + lr_decay_epochs = 1, + lr_decay_rate = 0.95, + random_state = NULL) { + base::print("ml_tae()") + + if (!is.null(random_state)) { + set.seed(random_state) + } + + optimizer_fn <- switch( + optimizer, + "adam" = torch::optim_adamw, + "adabound" = torch::optim_adabound, + "adabelief" = torch::optim_adabelief, + "madagrad" = torch::optim_madagrad, + "nadam" = torch::optim_nadam, + "qhadam" = torch::optim_qhadam, + "radam" = torch::optim_radam, + "swats" = torch::optim_swats, + "yogi" = torch::optim_yogi, + stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE) + ) + + opt_hparams <- list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay) + + model <- sits::sits_tae( + epochs = epochs, + batch_size = batch_size, + optimizer = optimizer_fn, + opt_hparams = opt_hparams, + lr_decay_epochs = lr_decay_epochs, + lr_decay_rate = lr_decay_rate + ) + + base::attr(model, "random_state") <- random_state + + model +} + + #* @openeo-process ml_lighttae <- function(epochs = 150, batch_size = 128, From 423781a2cf0b51ea9a22e659345e17e33bdbeef4 Mon Sep 17 00:00:00 2001 From: Brian Pondi Date: Mon, 25 Nov 2024 12:10:06 +0100 Subject: [PATCH 4/4] ml smooth and schema --- inst/sits/processes/ml_smooth_class.json | 57 ++++++++++++++++++++++++ inst/sits/processes/ml_tae.json | 4 +- inst/sits/processes3.R | 10 ++--- 3 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 inst/sits/processes/ml_smooth_class.json diff --git a/inst/sits/processes/ml_smooth_class.json b/inst/sits/processes/ml_smooth_class.json new file mode 100644 index 0000000..7205947 --- /dev/null +++ b/inst/sits/processes/ml_smooth_class.json @@ -0,0 +1,57 @@ +{ + "id": "ml_smooth_class", + "summary": "Apply a smoothing operation to a classified datacube.", + "description": "This process applies a smoothing operation to a classified datacube using a sliding window approach. The window size, neighborhood fraction, and smoothness factor determine the extent and behavior of the smoothing.", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The classified datacube to smooth.", + "schema": { + "type": "object", + "subtype": "datacube" + } + }, + { + "name": "window_size", + "description": "The size of the sliding window, defined in pixels. This determines the extent of the neighborhood considered during smoothing. Defaults to 7.", + "optional": true, + "default": 7, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "neighborhood_fraction", + "description": "The minimum fraction of similar neighboring pixels required to apply smoothing within the window. Values range from 0 (no similarity required) to 1 (all neighbors must be similar). Defaults to 0.5.", + "optional": true, + "default": 0.5, + "schema": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + }, + { + "name": "smoothness", + "description": "A factor that controls the intensity of the smoothing effect. Higher values result in stronger smoothing. Defaults to 10.", + "optional": true, + "default": 10, + "schema": { + "type": "number", + "minimum": 0 + } + } + ], + "returns": { + "description": "A smoothed classified datacube.", + "schema": { + "type": "object", + "subtype": "datacube" + } + } +} diff --git a/inst/sits/processes/ml_tae.json b/inst/sits/processes/ml_tae.json index 6263ae7..29cce56 100644 --- a/inst/sits/processes/ml_tae.json +++ b/inst/sits/processes/ml_tae.json @@ -3,9 +3,7 @@ "summary": "Initialize a Temporal Attention Encoder (TAE) model", "description": "Creates and configures a Temporal Attention Encoder (TAE) model. TAE leverages temporal attention mechanisms to process and analyze sequential data effectively. Parameters such as optimizer, learning rate, and decay schedules can be customized.", "categories": [ - "machine learning", - "time-series", - "attention" + "machine learning" ], "experimental": true, "parameters": [ diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R index f9f5269..8efe01f 100644 --- a/inst/sits/processes3.R +++ b/inst/sits/processes3.R @@ -381,11 +381,11 @@ ml_predict_probability <- function(data, model) { } #* @openeo-process -ml_class_smooth <- function(data, - window_size, - neighborhood_fraction, - smoothness) { - base::print("ml_class_smooth()") +ml_smooth_class <- function(data, + window_size = 7L, + neighborhood_fraction = 0.5, + smoothness = 10L) { + base::print("ml_smooth_class()") # Get current context of evaluation environment env <- openeocraft::current_env() # Preparing parameters