From ea614b380d1f35cb57935d4dc9a19e600b70f110 Mon Sep 17 00:00:00 2001
From: Brian Pondi <brian.pondi@uni-muenster.de>
Date: Thu, 21 Nov 2024 22:43:09 +0100
Subject: [PATCH 1/4] mlp, tempcnn, svm, lightae wrappers

---
 .../processes/ml_class_random_forest.json     |  67 ------
 .../processes/ml_fit_class_random_forest.json | 113 ----------
 inst/sits/processes3.R                        | 202 ++++++++++++++++++
 3 files changed, 202 insertions(+), 180 deletions(-)
 delete mode 100644 inst/sits/processes/ml_class_random_forest.json
 delete mode 100644 inst/sits/processes/ml_fit_class_random_forest.json

diff --git a/inst/sits/processes/ml_class_random_forest.json b/inst/sits/processes/ml_class_random_forest.json
deleted file mode 100644
index b8cc873..0000000
--- a/inst/sits/processes/ml_class_random_forest.json
+++ /dev/null
@@ -1,67 +0,0 @@
-{
-    "id": "ml_class_random_forest",
-    "summary": "Initialize a Random Forest classification model",
-    "description": "Defines a Random Forest classification model with parameters for tree count, split variables, and randomization seed.",
-    "categories": [
-        "machine learning"
-    ],
-    "experimental": true,
-    "parameters": [
-        {
-            "name": "num_trees",
-            "description": "The number of trees to build within the Random Forest classification.",
-            "optional": true,
-            "default": 100,
-            "schema": {
-                "type": "integer",
-                "minimum": 1
-            }
-        },
-        {
-            "name": "max_variables",
-            "description": "Specifies the number of variables considered for each split at a node.\n\nOptions include:\n\n- *integer*: Specifies an exact number of variables per split.\n- `all`: All variables are considered for each split.\n- `log2`: Uses the base-2 logarithm of the variable count per split.\n- `onethird`: Uses one-third of the total variables per split.\n- `sqrt`: Uses the square root of the number of variables per split, often a default for classification.",
-            "schema": [
-                {
-                    "type": "integer",
-                    "minimum": 1
-                },
-                {
-                    "type": "string",
-                    "enum": [
-                        "all",
-                        "log2",
-                        "onethird",
-                        "sqrt"
-                    ]
-                }
-            ]
-        },
-        {
-            "name": "seed",
-            "description": "Optional random seed for sampling. If not provided or `null`, results may vary with each execution.",
-            "optional": true,
-            "default": null,
-            "schema": {
-                "type": [
-                    "integer",
-                    "null"
-                ]
-            }
-        }
-    ],
-    "returns": {
-        "description": "A model definition that can be used in training or saved for later use.",
-        "schema": {
-            "type": "object",
-            "subtype": "ml-model"
-        }
-    },
-    "links": [
-        {
-            "href": "https://doi.org/10.1023/A:1010933404324",
-            "title": "Breiman (2001): Random Forests",
-            "type": "text/html",
-            "rel": "about"
-        }
-    ]
-}
diff --git a/inst/sits/processes/ml_fit_class_random_forest.json b/inst/sits/processes/ml_fit_class_random_forest.json
deleted file mode 100644
index a57bb66..0000000
--- a/inst/sits/processes/ml_fit_class_random_forest.json
+++ /dev/null
@@ -1,113 +0,0 @@
-{
-    "id": "ml_fit_class_random_forest",
-    "summary": "Train a random forest classification model",
-    "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).",
-    "categories": [
-        "machine learning"
-    ],
-    "experimental": true,
-    "parameters": [
-        {
-            "name": "training_set",
-            "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyses to learn patterns and relationships within the data.",
-            "schema": [
-                {
-                    "type": "object",
-                    "subtype": "datacube",
-                    "dimensions": [
-                        {
-                            "type": "geometry"
-                        },
-                        {
-                            "type": "bands"
-                        }
-                    ]
-                },
-                {
-                    "type": "object",
-                    "subtype": "datacube",
-                    "dimensions": [
-                        {
-                            "type": "geometry"
-                        },
-                        {
-                            "type": "other"
-                        }
-                    ]
-                }
-            ]
-        },
-        {
-            "name": "target",
-            "description": "The dimension in the training set that represents the dependent variable for Random Forest classification.",
-            "schema": {
-                "type": "string"
-            }
-        },
-        {
-            "name": "max_variables",
-            "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.",
-            "schema": [
-                {
-                    "type": "integer",
-                    "minimum": 1
-                },
-                {
-                    "type": "string",
-                    "enum": [
-                        "all",
-                        "log2",
-                        "onethird",
-                        "sqrt"
-                    ]
-                }
-            ]
-        },
-        {
-            "name": "num_trees",
-            "description": "The number of trees build within the Random Forest classification.",
-            "optional": true,
-            "default": 100,
-            "schema": {
-                "type": "integer",
-                "minimum": 1
-            }
-        },
-        {
-            "name": "train_test_split",
-            "description": "Splits the training_set into random train and test subsets.",
-            "optional": true,
-            "default": 0.8,
-            "schema": {
-                "type": "number"
-            }
-        },
-        {
-            "name": "random_state",
-            "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
-            "optional": true,
-            "default": null,
-            "schema": {
-                "type": [
-                    "integer",
-                    "null"
-                ]
-            }
-        }
-    ],
-    "returns": {
-        "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.",
-        "schema": {
-            "type": "object",
-            "subtype": "ml-model"
-        }
-    },
-    "links": [
-        {
-            "href": "https://doi.org/10.1023/A:1010933404324",
-            "title": "Breiman (2001): Random Forests",
-            "type": "text/html",
-            "rel": "about"
-        }
-    ]
-}
\ No newline at end of file
diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R
index 2ef546c..151a328 100644
--- a/inst/sits/processes3.R
+++ b/inst/sits/processes3.R
@@ -45,6 +45,208 @@ ml_random_forest <- function(num_trees = 100,
   model
 }
 
+#* @openeo-process
+ml_svm <- function(kernel = "radial",
+                   degree = 3,
+                   coef0 = 0,
+                   cost = 10,
+                   tolerance = 0.001,
+                   epsilon = 0.1,
+                   cachesize = 1000,
+                   random_state = NULL,
+                   classification = TRUE) {
+  base::print("ml_svm()")
+  formula = sits::sits_formula_linear()
+
+  if (!classification) {
+    stop("Regression is not supported", call. = FALSE)
+  }
+
+  if (!is.null(random_state)) {
+    set.seed(random_state)
+  }
+
+  model <- sits::sits_svm(
+    formula = formula,
+    cachesize = cachesize,
+    kernel = kernel,
+    degree = degree,
+    coef0 = coef0,
+    cost = cost,
+    tolerance = tolerance,
+    epsilon = epsilon
+  )
+
+  base::attr(model, "random_state") <- random_state
+
+  model
+}
+
+
+#* @openeo-process
+ml_mlp <- function(layers = base::list(512, 512, 512),
+                    dropout_rates = base::list(0.2, 0.3, 0.4),
+                    optimizer = "adam",
+                    learning_rate = 0.001,
+                    epsilon = 0.00000001,
+                    weight_decay = 0.000001,
+                    epochs = 100,
+                    batch_size = 64,
+                    random_state = NULL,
+                    classification = TRUE) {
+    base::print("ml_mlp()")
+
+    if (!classification) {
+      stop("Regression is not supported", call. = FALSE)
+    }
+
+    if (!is.null(random_state)) {
+      set.seed(random_state)
+    }
+
+    optimizer_fn <- switch(
+      optimizer,
+      "adam" = torch::optim_adamw,
+      "adabound" = torch::optim_adabound,
+      "adabelief" = torch::optim_adabelief,
+      "madagrad" = torch::optim_madagrad,
+      "nadam" = torch::optim_nadam,
+      "qhadam" = torch::optim_qhadam,
+      "radam" = torch::optim_radam,
+      "swats" = torch::optim_swats,
+      "yogi" = torch::optim_yogi,
+      stop("Unsupported optimizer. currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.  ", call. = FALSE)
+    )
+
+    opt_hparams <- base::list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay)
+
+    layers <- base::unlist(layers)
+    dropout_rates <- base::unlist(dropout_rates)
+
+
+    model <- sits::sits_mlp(
+      layers = layers,
+      dropout_rates = dropout_rates,
+      optimizer = optimizer_fn,
+      opt_hparams = opt_hparams,
+      epochs = epochs,
+      batch_size = batch_size
+    )
+
+    base::attr(model, "random_state") <- random_state
+
+    model
+}
+
+#* @openeo-process
+ml_tempcnn <- function(cnn_layers = base::list(64, 64, 64),
+                       cnn_kernels = base::list(5, 5, 5),
+                       cnn_dropout_rates = base::list(0.2, 0.2, 0.2),
+                       dense_layer_nodes = 256,
+                       dense_layer_dropout_rate = 0.5,
+                       optimizer = "adam",
+                       learning_rate = 0.0005,
+                       epsilon = 0.00000001,
+                       weight_decay = 0.000001,
+                       lr_decay_epochs = 1,
+                       lr_decay_rate = 0.95,
+                       epochs = 150,
+                       batch_size = 64,
+                       random_state = NULL) {
+  base::print("ml_tempcnn()")
+
+
+  if (!is.null(random_state)) {
+    set.seed(random_state)
+  }
+
+  optimizer_fn <- switch(
+    optimizer,
+    "adam" = torch::optim_adamw,
+    "adabound" = torch::optim_adabound,
+    "adabelief" = torch::optim_adabelief,
+    "madagrad" = torch::optim_madagrad,
+    "nadam" = torch::optim_nadam,
+    "qhadam" = torch::optim_qhadam,
+    "radam" = torch::optim_radam,
+    "swats" = torch::optim_swats,
+    "yogi" = torch::optim_yogi,
+    stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE)
+  )
+
+  opt_hparams <- base::list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay)
+
+  cnn_layers <- base::unlist(cnn_layers)
+  cnn_kernels <- base::unlist(cnn_kernels)
+  cnn_dropout_rates <- base::unlist(cnn_dropout_rates)
+
+  model <- sits::sits_tempcnn(
+    cnn_layers = cnn_layers,
+    cnn_kernels = cnn_kernels,
+    cnn_dropout_rates = cnn_dropout_rates,
+    dense_layer_nodes = dense_layer_nodes,
+    dense_layer_dropout_rate = dense_layer_dropout_rate,
+    optimizer = optimizer_fn,
+    opt_hparams = opt_hparams,
+    lr_decay_epochs = lr_decay_epochs,
+    lr_decay_rate = lr_decay_rate,
+    epochs = epochs,
+    batch_size = batch_size
+  )
+
+  base::attr(model, "random_state") <- random_state
+
+  model
+}
+
+#* @openeo-process
+ml_lighttae <- function(epochs = 150,
+                        batch_size = 128,
+                        optimizer = "adam",
+                        learning_rate = 0.0005,
+                        epsilon = 0.00000001,
+                        weight_decay = 0.0007,
+                        lr_decay_epochs = 50,
+                        lr_decay_rate = 1,
+                        random_state = NULL) {
+  base::print("ml_lighttae()")
+
+  if (!is.null(random_state)) {
+    set.seed(random_state)
+  }
+
+  optimizer_fn <- switch(
+    optimizer,
+    "adam" = torch::optim_adamw,
+    "adabound" = torch::optim_adabound,
+    "adabelief" = torch::optim_adabelief,
+    "madagrad" = torch::optim_madagrad,
+    "nadam" = torch::optim_nadam,
+    "qhadam" = torch::optim_qhadam,
+    "radam" = torch::optim_radam,
+    "swats" = torch::optim_swats,
+    "yogi" = torch::optim_yogi,
+    stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE)
+  )
+
+  opt_hparams <- list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay)
+
+  model <- sits::sits_lighttae(
+    epochs = epochs,
+    batch_size = batch_size,
+    optimizer = optimizer_fn,
+    opt_hparams = opt_hparams,
+    lr_decay_epochs = lr_decay_epochs,
+    lr_decay_rate = lr_decay_rate
+  )
+
+  base::attr(model, "random_state") <- random_state
+
+  model
+}
+
+
+
 #* @openeo-process
 ml_fit <- function(model, training_set, target="label") {
   base::print("ml_fit()")

From 3af11bd9a64897fda1219b7d40730d505528262b Mon Sep 17 00:00:00 2001
From: Brian Pondi <brian.pondi@uni-muenster.de>
Date: Fri, 22 Nov 2024 13:47:11 +0100
Subject: [PATCH 2/4] schemas poc for ml and dl

---
 inst/sits/processes/ml_lighttae.json      | 128 +++++++++++++++
 inst/sits/processes/ml_mlp.json           | 138 ++++++++++++++++
 inst/sits/processes/ml_random_forest.json |  29 ++--
 inst/sits/processes/ml_svm.json           | 108 +++++++++++++
 inst/sits/processes/ml_tempcnn.json       | 186 ++++++++++++++++++++++
 inst/sits/processes3.R                    |   6 +-
 6 files changed, 582 insertions(+), 13 deletions(-)
 create mode 100644 inst/sits/processes/ml_lighttae.json
 create mode 100644 inst/sits/processes/ml_mlp.json
 create mode 100644 inst/sits/processes/ml_svm.json
 create mode 100644 inst/sits/processes/ml_tempcnn.json

diff --git a/inst/sits/processes/ml_lighttae.json b/inst/sits/processes/ml_lighttae.json
new file mode 100644
index 0000000..43162ec
--- /dev/null
+++ b/inst/sits/processes/ml_lighttae.json
@@ -0,0 +1,128 @@
+{
+    "id": "ml_lighttae",
+    "summary": "Initialize a Lightweight Temporal Self-Attention Encoder (LTAE) model",
+    "description": "Creates and configures a Lightweight Temporal Self-Attention Encoder (LTAE) model. LTAE is designed for efficient modeling of temporal dependencies in sequential data using self-attention mechanisms. Parameters such as optimizer, learning rate, and learning rate decay schedule can be specified.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "epochs",
+            "description": "The number of training epochs. Defaults to 150.",
+            "optional": true,
+            "default": 150,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "batch_size",
+            "description": "The size of batches for training. Defaults to 128.",
+            "optional": true,
+            "default": 128,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "optimizer",
+            "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.",
+            "optional": true,
+            "default": "adam",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adabound",
+                    "adabelief",
+                    "madagrad",
+                    "nadam",
+                    "qhadam",
+                    "radam",
+                    "swats",
+                    "yogi"
+                ]
+            }
+        },
+        {
+            "name": "learning_rate",
+            "description": "The initial learning rate for training. Defaults to 0.0005.",
+            "optional": true,
+            "default": 0.0005,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.",
+            "optional": true,
+            "default": 0.00000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "weight_decay",
+            "description": "The weight decay (L2 penalty) value for regularization. Defaults to 0.0007.",
+            "optional": true,
+            "default": 0.0007,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "lr_decay_epochs",
+            "description": "The number of epochs after which the learning rate is decayed. Defaults to 50.",
+            "optional": true,
+            "default": 50,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "lr_decay_rate",
+            "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 1.",
+            "optional": true,
+            "default": 1,
+            "schema": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+            }
+        },
+        {
+            "name": "random_state",
+            "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        }
+    ],
+    "returns": {
+        "description": "An untrained Lightweight Temporal Self-Attention Encoder (LTAE) model instance.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "description": "Research paper describing the Lightweight Temporal Self-Attention Encoder (LTAE).",
+            "citation": "V. S. F. Garnot and L. Landrieu, “Lightweight Temporal Self-attention for Classifying Satellite Images Time Series,” in Advanced Analytics and Learning on Temporal Data, 2020, pp. 171–181, doi: 10.1007/978-3-030-65742-0_12.",
+            "url": "https://doi.org/10.1007/978-3-030-65742-0_12"
+        }
+    ]
+}
diff --git a/inst/sits/processes/ml_mlp.json b/inst/sits/processes/ml_mlp.json
new file mode 100644
index 0000000..76bd5f6
--- /dev/null
+++ b/inst/sits/processes/ml_mlp.json
@@ -0,0 +1,138 @@
+{
+    "id": "ml_mlp",
+    "summary": "Initialize a Multi-Layer Perceptron (MLP) model",
+    "description": "Creates and configures a Multi-Layer Perceptron (MLP) model. Parameters such as the architecture, optimizer, learning rate, and other training options can be specified.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "layers",
+            "description": "A list specifying the number of neurons in each layer of the MLP. Defaults to [512, 512, 512].",
+            "optional": true,
+            "default": [512, 512, 512],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 1
+            }
+        },
+        {
+            "name": "dropout_rates",
+            "description": "A list of dropout rates for each layer, corresponding to the layers. Must match the number of layers. Defaults to [0.2, 0.3, 0.4].",
+            "optional": true,
+            "default": [0.2, 0.3, 0.4],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 1
+                },
+                "minItems": 1
+            }
+        },
+        {
+            "name": "optimizer",
+            "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.",
+            "optional": true,
+            "default": "adam",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adabound",
+                    "adabelief",
+                    "madagrad",
+                    "nadam",
+                    "qhadam",
+                    "radam",
+                    "swats",
+                    "yogi"
+                ]
+            }
+        },
+        {
+            "name": "learning_rate",
+            "description": "The initial learning rate for training. Defaults to 0.001.",
+            "optional": true,
+            "default": 0.001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.",
+            "optional": true,
+            "default": 0.00000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "weight_decay",
+            "description": "The weight decay (L2 penalty) value for regularization. Defaults to 1e-6.",
+            "optional": true,
+            "default": 0.000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epochs",
+            "description": "The number of training epochs. Defaults to 100.",
+            "optional": true,
+            "default": 100,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "batch_size",
+            "description": "The size of batches for training. Defaults to 64.",
+            "optional": true,
+            "default": 64,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "random_state",
+            "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        },
+        {
+            "name": "classification",
+            "description": "Specifies whether the MLP model is for classification. Defaults to `true`.",
+            "optional": true,
+            "default": true,
+            "schema": {
+                "type": "boolean"
+            }
+        }
+    ],
+    "returns": {
+        "description": "An untrained MLP model instance.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    }
+}
diff --git a/inst/sits/processes/ml_random_forest.json b/inst/sits/processes/ml_random_forest.json
index bcd5471..017db3b 100644
--- a/inst/sits/processes/ml_random_forest.json
+++ b/inst/sits/processes/ml_random_forest.json
@@ -17,20 +17,29 @@
             }
         },
         {
-            "name": "max_depth",
-            "description": "The maximum depth of each tree in the Random Forest. If `null`, nodes expand until all leaves are pure or contain fewer than min_samples_split samples.",
+            "name": "max_variables",
+            "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.",
             "optional": true,
-            "default": null,
-            "schema": {
-                "type": [
-                    "integer",
-                    "null"
-                ]
-            }
+            "default": "sqrt",
+            "schema": [
+                {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                {
+                    "type": "string",
+                    "enum": [
+                        "all",
+                        "log2",
+                        "onethird",
+                        "sqrt"
+                    ]
+                }
+            ]
         },
         {
             "name": "random_state",
-            "description": "Seed for the random number generator. If `null`, no seed is used, and results may vary.",
+            "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
             "optional": true,
             "default": null,
             "schema": {
diff --git a/inst/sits/processes/ml_svm.json b/inst/sits/processes/ml_svm.json
new file mode 100644
index 0000000..860db50
--- /dev/null
+++ b/inst/sits/processes/ml_svm.json
@@ -0,0 +1,108 @@
+{
+    "id": "ml_svm",
+    "summary": "Initialize an SVM model",
+    "description": "Creates and configures a Support Vector Machine (SVM) model. Parameters such as kernel type, cost, and tolerance can be specified.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "kernel",
+            "description": "The kernel type to be used in the SVM. Supported values are 'linear', 'poly', 'radial', and 'sigmoid'. Defaults to 'radial'.",
+            "optional": true,
+            "default": "radial",
+            "schema": {
+                "type": "string",
+                "enum": ["linear", "poly", "radial", "sigmoid"]
+            }
+        },
+        {
+            "name": "degree",
+            "description": "The degree of the polynomial kernel function. Ignored by kernels other than 'poly'. Defaults to 3.",
+            "optional": true,
+            "default": 3,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "coef0",
+            "description": "The independent term in kernel functions. Used by 'poly' and 'sigmoid' kernels. Defaults to 0.",
+            "optional": true,
+            "default": 0,
+            "schema": {
+                "type": "number"
+            }
+        },
+        {
+            "name": "cost",
+            "description": "The penalty parameter C of the error term. A higher value encourages fewer margin violations. Defaults to 10.",
+            "optional": true,
+            "default": 10,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "tolerance",
+            "description": "Tolerance for stopping criterion. Defaults to 0.001.",
+            "optional": true,
+            "default": 0.001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function. Defaults to 0.1.",
+            "optional": true,
+            "default": 0.1,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "cachesize",
+            "description": "The size of the kernel cache in megabytes. Defaults to 1000 MB.",
+            "optional": true,
+            "default": 1000,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "random_state",
+            "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        },
+        {
+            "name": "classification",
+            "description": "Specifies whether the SVM model is for classification. Defaults to `true`.",
+            "optional": true,
+            "default": true,
+            "schema": {
+                "type": "boolean"
+            }
+        }
+    ],
+    "returns": {
+        "description": "An untrained SVM model instance.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    }
+}
diff --git a/inst/sits/processes/ml_tempcnn.json b/inst/sits/processes/ml_tempcnn.json
new file mode 100644
index 0000000..c16526f
--- /dev/null
+++ b/inst/sits/processes/ml_tempcnn.json
@@ -0,0 +1,186 @@
+{
+    "id": "ml_tempcnn",
+    "summary": "Initialize a Temporal Convolutional Neural Network (TempCNN) model",
+    "description": "Creates and configures a Temporal Convolutional Neural Network (TempCNN) model for time-series data. Parameters such as the convolutional architecture, dense layers, optimizer, and learning rate schedule can be specified.",
+    "categories": [
+        "machine learning",
+        "time-series"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "cnn_layers",
+            "description": "A list specifying the number of filters in each convolutional layer. Defaults to [64, 64, 64].",
+            "optional": true,
+            "default": [64, 64, 64],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 1
+            }
+        },
+        {
+            "name": "cnn_kernels",
+            "description": "A list specifying the kernel size for each convolutional layer. Must match the number of CNN layers. Defaults to [5, 5, 5].",
+            "optional": true,
+            "default": [5, 5, 5],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "integer",
+                    "minimum": 1
+                },
+                "minItems": 1
+            }
+        },
+        {
+            "name": "cnn_dropout_rates",
+            "description": "A list of dropout rates for each convolutional layer. Must match the number of CNN layers. Defaults to [0.2, 0.2, 0.2].",
+            "optional": true,
+            "default": [0.2, 0.2, 0.2],
+            "schema": {
+                "type": "array",
+                "items": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 1
+                },
+                "minItems": 1
+            }
+        },
+        {
+            "name": "dense_layer_nodes",
+            "description": "The number of nodes in the dense layer following the convolutional layers. Defaults to 256.",
+            "optional": true,
+            "default": 256,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "dense_layer_dropout_rate",
+            "description": "The dropout rate for the dense layer. Defaults to 0.5.",
+            "optional": true,
+            "default": 0.5,
+            "schema": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+            }
+        },
+        {
+            "name": "optimizer",
+            "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.",
+            "optional": true,
+            "default": "adam",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adabound",
+                    "adabelief",
+                    "madagrad",
+                    "nadam",
+                    "qhadam",
+                    "radam",
+                    "swats",
+                    "yogi"
+                ]
+            }
+        },
+        {
+            "name": "learning_rate",
+            "description": "The initial learning rate for training. Defaults to 0.0005.",
+            "optional": true,
+            "default": 0.0005,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.",
+            "optional": true,
+            "default": 0.00000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "weight_decay",
+            "description": "The weight decay (L2 penalty) value for regularization. Defaults to 1e-6.",
+            "optional": true,
+            "default": 0.000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "lr_decay_epochs",
+            "description": "The number of epochs after which the learning rate is decayed. Defaults to 1.",
+            "optional": true,
+            "default": 1,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "lr_decay_rate",
+            "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 0.95.",
+            "optional": true,
+            "default": 0.95,
+            "schema": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+            }
+        },
+        {
+            "name": "epochs",
+            "description": "The number of training epochs. Defaults to 150.",
+            "optional": true,
+            "default": 150,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "batch_size",
+            "description": "The size of batches for training. Defaults to 64.",
+            "optional": true,
+            "default": 64,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "random_state",
+            "description": "Seed for the random number generator. If `null`, no seed is used, and results may vary.",
+            "optional": true,
+            "default": null,
+            "schema": {
+                "type": [
+                    "integer",
+                    "null"
+                ]
+            }
+        }
+    ],
+    "returns": {
+        "description": "An untrained TempCNN model instance.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    }
+}
diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R
index 151a328..6544d81 100644
--- a/inst/sits/processes3.R
+++ b/inst/sits/processes3.R
@@ -30,7 +30,7 @@ load_collection <- function(id,
 
 #* @openeo-process
 ml_random_forest <- function(num_trees = 100,
-                             max_depth = NULL,
+                             max_variables ="sqrt",
                              random_state = NULL,
                              classification = TRUE) {
   base::print("ml_random_forest()")
@@ -38,8 +38,8 @@ ml_random_forest <- function(num_trees = 100,
     stop("Regression is not supported", call. = FALSE)
   }
   model <- sits::sits_rfor(
-    num_trees = num_trees,
-    mtry = max_depth
+    num_trees = num_trees
+    # mtry = max_variables , TO DO, handle max_variables param, use default for now
   )
   base::attr(model, "random_state") <- random_state
   model

From 4b9ccae5436b83c067853828e28c00186d66d5dd Mon Sep 17 00:00:00 2001
From: Brian Pondi <brian.pondi@uni-muenster.de>
Date: Mon, 25 Nov 2024 11:56:29 +0100
Subject: [PATCH 3/4] tae model plus schema

---
 inst/sits/processes/ml_tae.json | 118 ++++++++++++++++++++++++++++++++
 inst/sits/processes3.R          |  47 +++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 inst/sits/processes/ml_tae.json

diff --git a/inst/sits/processes/ml_tae.json b/inst/sits/processes/ml_tae.json
new file mode 100644
index 0000000..6263ae7
--- /dev/null
+++ b/inst/sits/processes/ml_tae.json
@@ -0,0 +1,118 @@
+{
+    "id": "ml_tae",
+    "summary": "Initialize a Temporal Attention Encoder (TAE) model",
+    "description": "Creates and configures a Temporal Attention Encoder (TAE) model. TAE leverages temporal attention mechanisms to process and analyze sequential data effectively. Parameters such as optimizer, learning rate, and decay schedules can be customized.",
+    "categories": [
+        "machine learning",
+        "time-series",
+        "attention"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "epochs",
+            "description": "The number of training epochs. Defaults to 150.",
+            "optional": true,
+            "default": 150,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "batch_size",
+            "description": "The size of batches for training. Defaults to 64.",
+            "optional": true,
+            "default": 64,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "optimizer",
+            "description": "The optimizer to use for training. Defaults to 'adam'. Supported values include 'adam', 'adabound', 'adabelief', 'madagrad', 'nadam', 'qhadam', 'radam', 'swats', and 'yogi'.",
+            "optional": true,
+            "default": "adam",
+            "schema": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adabound",
+                    "adabelief",
+                    "madagrad",
+                    "nadam",
+                    "qhadam",
+                    "radam",
+                    "swats",
+                    "yogi"
+                ]
+            }
+        },
+        {
+            "name": "learning_rate",
+            "description": "The initial learning rate for training. Defaults to 0.001.",
+            "optional": true,
+            "default": 0.001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "epsilon",
+            "description": "The epsilon value for numerical stability in optimizers. Defaults to 1e-8.",
+            "optional": true,
+            "default": 0.00000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "weight_decay",
+            "description": "The weight decay (L2 penalty) value for regularization. Defaults to 0.000001.",
+            "optional": true,
+            "default": 0.000001,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        },
+        {
+            "name": "lr_decay_epochs",
+            "description": "The number of epochs after which the learning rate is decayed. Defaults to 1.",
+            "optional": true,
+            "default": 1,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "lr_decay_rate",
+            "description": "The rate at which the learning rate is decayed after the specified number of epochs. Defaults to 0.95.",
+            "optional": true,
+            "default": 0.95,
+            "schema": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+            }
+        }
+    ],
+    "returns": {
+        "description": "An untrained Temporal Attention Encoder (TAE) model instance.",
+        "schema": {
+            "type": "object",
+            "subtype": "ml-model"
+        }
+    },
+    "links": [
+        {
+            "description": "Research paper describing the Temporal Attention Encoder (TAE) and its application in satellite image time series classification.",
+            "citation": "V. Garnot, L. Landrieu, S. Giordano, and N. Chehata, “Satellite Image Time Series Classification With Pixel-Set Encoders and Temporal Self-Attention,” in 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2020, pp. 12322–12331, doi: 10.1109/CVPR42600.2020.01234.",
+            "url": "https://doi.org/10.1109/CVPR42600.2020.01234"
+        }
+    ]
+}
diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R
index 6544d81..f9f5269 100644
--- a/inst/sits/processes3.R
+++ b/inst/sits/processes3.R
@@ -199,6 +199,53 @@ ml_tempcnn <- function(cnn_layers = base::list(64, 64, 64),
   model
 }
 
+#* @openeo-process
+ml_tae <- function(epochs = 150,
+                   batch_size = 64,
+                   optimizer = "adam",
+                   learning_rate = 0.001,
+                   epsilon = 0.00000001,
+                   weight_decay = 0.000001,
+                   lr_decay_epochs = 1,
+                   lr_decay_rate = 0.95,
+                   random_state = NULL) {
+  base::print("ml_tae()")
+
+  if (!is.null(random_state)) {
+    set.seed(random_state)
+  }
+
+  optimizer_fn <- switch(
+    optimizer,
+    "adam" = torch::optim_adamw,
+    "adabound" = torch::optim_adabound,
+    "adabelief" = torch::optim_adabelief,
+    "madagrad" = torch::optim_madagrad,
+    "nadam" = torch::optim_nadam,
+    "qhadam" = torch::optim_qhadam,
+    "radam" = torch::optim_radam,
+    "swats" = torch::optim_swats,
+    "yogi" = torch::optim_yogi,
+    stop("Unsupported optimizer. Currently only 'adam, adabound, adabelief, madagrad, nadam, qhadam, radam, swats, yogi' are supported.", call. = FALSE)
+  )
+
+  opt_hparams <- list(lr = learning_rate, eps = epsilon, weight_decay = weight_decay)
+
+  model <- sits::sits_tae(
+    epochs = epochs,
+    batch_size = batch_size,
+    optimizer = optimizer_fn,
+    opt_hparams = opt_hparams,
+    lr_decay_epochs = lr_decay_epochs,
+    lr_decay_rate = lr_decay_rate
+  )
+
+  base::attr(model, "random_state") <- random_state
+
+  model
+}
+
+
 #* @openeo-process
 ml_lighttae <- function(epochs = 150,
                         batch_size = 128,

From 423781a2cf0b51ea9a22e659345e17e33bdbeef4 Mon Sep 17 00:00:00 2001
From: Brian Pondi <brian.pondi@uni-muenster.de>
Date: Mon, 25 Nov 2024 12:10:06 +0100
Subject: [PATCH 4/4] ml smooth and schema

---
 inst/sits/processes/ml_smooth_class.json | 57 ++++++++++++++++++++++++
 inst/sits/processes/ml_tae.json          |  4 +-
 inst/sits/processes3.R                   | 10 ++---
 3 files changed, 63 insertions(+), 8 deletions(-)
 create mode 100644 inst/sits/processes/ml_smooth_class.json

diff --git a/inst/sits/processes/ml_smooth_class.json b/inst/sits/processes/ml_smooth_class.json
new file mode 100644
index 0000000..7205947
--- /dev/null
+++ b/inst/sits/processes/ml_smooth_class.json
@@ -0,0 +1,57 @@
+{
+    "id": "ml_smooth_class",
+    "summary": "Apply a smoothing operation to a classified datacube.",
+    "description": "This process applies a smoothing operation to a classified datacube using a sliding window approach. The window size, neighborhood fraction, and smoothness factor determine the extent and behavior of the smoothing.",
+    "categories": [
+        "machine learning"
+    ],
+    "experimental": true,
+    "parameters": [
+        {
+            "name": "data",
+            "description": "The classified datacube to smooth.",
+            "schema": {
+                "type": "object",
+                "subtype": "datacube"
+            }
+        },
+        {
+            "name": "window_size",
+            "description": "The size of the sliding window, defined in pixels. This determines the extent of the neighborhood considered during smoothing. Defaults to 7.",
+            "optional": true,
+            "default": 7,
+            "schema": {
+                "type": "integer",
+                "minimum": 1
+            }
+        },
+        {
+            "name": "neighborhood_fraction",
+            "description": "The minimum fraction of similar neighboring pixels required to apply smoothing within the window. Values range from 0 (no similarity required) to 1 (all neighbors must be similar). Defaults to 0.5.",
+            "optional": true,
+            "default": 0.5,
+            "schema": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+            }
+        },
+        {
+            "name": "smoothness",
+            "description": "A factor that controls the intensity of the smoothing effect. Higher values result in stronger smoothing. Defaults to 10.",
+            "optional": true,
+            "default": 10,
+            "schema": {
+                "type": "number",
+                "minimum": 0
+            }
+        }
+    ],
+    "returns": {
+        "description": "A smoothed classified datacube.",
+        "schema": {
+            "type": "object",
+            "subtype": "datacube"
+        }
+    }
+}
diff --git a/inst/sits/processes/ml_tae.json b/inst/sits/processes/ml_tae.json
index 6263ae7..29cce56 100644
--- a/inst/sits/processes/ml_tae.json
+++ b/inst/sits/processes/ml_tae.json
@@ -3,9 +3,7 @@
     "summary": "Initialize a Temporal Attention Encoder (TAE) model",
     "description": "Creates and configures a Temporal Attention Encoder (TAE) model. TAE leverages temporal attention mechanisms to process and analyze sequential data effectively. Parameters such as optimizer, learning rate, and decay schedules can be customized.",
     "categories": [
-        "machine learning",
-        "time-series",
-        "attention"
+        "machine learning"
     ],
     "experimental": true,
     "parameters": [
diff --git a/inst/sits/processes3.R b/inst/sits/processes3.R
index f9f5269..8efe01f 100644
--- a/inst/sits/processes3.R
+++ b/inst/sits/processes3.R
@@ -381,11 +381,11 @@ ml_predict_probability <- function(data, model) {
 }
 
 #* @openeo-process
-ml_class_smooth <- function(data,
-                            window_size,
-                            neighborhood_fraction,
-                            smoothness) {
-  base::print("ml_class_smooth()")
+ml_smooth_class <- function(data,
+                            window_size = 7L,
+                            neighborhood_fraction = 0.5,
+                            smoothness = 10L) {
+  base::print("ml_smooth_class()")
   # Get current context of evaluation environment
   env <- openeocraft::current_env()
   # Preparing parameters