Skip to content

Commit

Permalink
Support distmat as lower triangular for included distances
Browse files Browse the repository at this point in the history
  • Loading branch information
asardaes committed Jun 30, 2024
1 parent 2e264fc commit ad3bf5b
Show file tree
Hide file tree
Showing 21 changed files with 302 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Version 5.6.0
* Update Makevars for ARM version of Windows.
* Sanitize internal usage of `do.call` to avoid huge backtraces.
* Support lower triangular `distmat` objects for symmetric distances.

## Version 5.5.12
* Remove explicit C++ requirements.
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ S3method(as.data.frame,crossdist)
S3method(as.data.frame,pairdist)
S3method(as.matrix,crossdist)
S3method(as.matrix,pairdist)
S3method(base::as.matrix,distdiag)
S3method(base::dim,Distmat)
S3method(base::dim,DistmatLowerTriangular)
S3method(base::dim,SparseDistmat)
Expand Down Expand Up @@ -128,6 +129,7 @@ importFrom(rlang,"!!!")
importFrom(rlang,.data)
importFrom(rlang,as_environment)
importFrom(rlang,as_string)
importFrom(rlang,caller_env)
importFrom(rlang,enexpr)
importFrom(rlang,enexprs)
importFrom(rlang,env_bind)
Expand Down
2 changes: 1 addition & 1 deletion R/CLUSTERING-tsclust.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pam_distmat <- function(series, control, distance, cent_char, family, args, trac
if (trace) cat("\n\tPrecomputing distance matrix...\n\n")

if (control$symmetric) {
distfun <- ddist2(distance, control, lower_triangular_only = TRUE)
distfun <- ddist2(distance, control, lower_triangular_only = cent_char != "fcmdd")
distmat <- methods::as(quoted_call(distfun, x = series, centroids = NULL, dots = args$dist),
"Distmat")
}
Expand Down
11 changes: 10 additions & 1 deletion R/DISTANCES-dtw-basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ dtw_basic <- function(x, y, window.size = NULL, norm = "L1",
dtw_basic_proxy <- function(x, y = NULL, window.size = NULL, norm = "L1",
step.pattern = dtw::symmetric2,
normalize = FALSE, sqrt.dist = TRUE, ...,
error.check = TRUE, pairwise = FALSE)
error.check = TRUE, pairwise = FALSE, lower_triangular_only = FALSE)
{
x <- tslist(x)
if (error.check) check_consistency(x, "vltslist")
Expand All @@ -130,6 +130,7 @@ dtw_basic_proxy <- function(x, y = NULL, window.size = NULL, norm = "L1",
}

fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
diagonal <- FALSE
eval(prepare_expr) # UTILS-expressions.R

# adjust parameters for this distance
Expand Down Expand Up @@ -176,6 +177,14 @@ dtw_basic_proxy <- function(x, y = NULL, window.size = NULL, norm = "L1",
dim(D) <- NULL
class(D) <- "pairdist"
}
else if (lower_triangular_only) {
dim(D) <- NULL
class(D) <- "dist"
attr(D, "Size") <- length(x)
attr(D, "Diag") <- FALSE
attr(D, "Upper") <- FALSE
attr(D, "Labels") <- names(x)
}
else {
dimnames(D) <- dim_names
class(D) <- "crossdist"
Expand Down
26 changes: 24 additions & 2 deletions R/DISTANCES-gak.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ gak <- GAK
# ==================================================================================================

gak_proxy <- function(x, y = NULL, ..., sigma = NULL, window.size = NULL, normalize = TRUE,
error.check = TRUE, pairwise = FALSE, .internal_ = FALSE)
error.check = TRUE, pairwise = FALSE, .internal_ = FALSE,
lower_triangular_only = FALSE)
{
# normalization will be done manually to avoid multiple calculations of gak_x and gak_y
if (!.internal_ && !normalize) { # nocov start
Expand All @@ -185,6 +186,7 @@ gak_proxy <- function(x, y = NULL, ..., sigma = NULL, window.size = NULL, normal
stop("Parameter 'sigma' must be positive.")

fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
diagonal <- FALSE
eval(prepare_expr) # UTILS-expressions.R

# adjust parameters for this distance
Expand Down Expand Up @@ -229,12 +231,32 @@ gak_proxy <- function(x, y = NULL, ..., sigma = NULL, window.size = NULL, normal
if (normalize) D <- 1 - exp(D - 0.5 * (gak_x + gak_y))
class(D) <- "pairdist"
}
else if (lower_triangular_only) {
dim(D) <- NULL
if (normalize) {
k <- 1L
for (j in 1L:(length(x) - 1L)) {
for (i in (j+1L):length(x)) {
D[k] <- 1 - exp(D[k] - (gak_x[i] + gak_x[j]) / 2)
k <- k + 1L
}
}
}

class(D) <- "dist"
attr(D, "Size") <- length(x)
attr(D, "Diag") <- FALSE
attr(D, "Upper") <- FALSE
attr(D, "Labels") <- names(x)
}
else {
if (normalize) D <- 1 - exp(D - outer(gak_x, gak_y, function(x, y) { (x + y) / 2 }))
dimnames(D) <- dim_names
class(D) <- "crossdist"
}
if (!pairwise && symmetric && normalize) diag(D) <- 0

if (!pairwise && symmetric && !lower_triangular_only) diag(D) <- 0

attr(D, "method") <- "GAK"
attr(D, "sigma") <- sigma
# return
Expand Down
3 changes: 2 additions & 1 deletion R/DISTANCES-lb-improved.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ lb_improved_proxy <- function(x, y = NULL, window.size = NULL, norm = "L1", ...,
if (length(x) == 0L || length(y) == 0L) stop("Empty list received in x or y.") # nocov start
if (error.check) check_consistency(c(x,y), "tslist")
if (is_multivariate(c(x,y))) stop("lb_improved does not support multivariate series.") # nocov end
symmetric <- FALSE

lower_triangular_only <- symmetric <- FALSE
fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
eval(prepare_expr) # UTILS-expressions.R

Expand Down
3 changes: 2 additions & 1 deletion R/DISTANCES-lb-keogh.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ lb_keogh_proxy <- function(x, y = NULL, window.size = NULL, norm = "L1", ...,
if (length(x) == 0L || length(y) == 0L) stop("Empty list received in x or y.") # nocov start
if (error.check) check_consistency(c(x,y), "tslist")
if (is_multivariate(c(x,y))) stop("lb_keogh does not support multivariate series.") # nocov end
symmetric <- FALSE

lower_triangular_only <- symmetric <- FALSE
fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
eval(prepare_expr) # UTILS-expressions.R

Expand Down
13 changes: 12 additions & 1 deletion R/DISTANCES-sbd.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ sbd <- SBD
#' @importFrom stats fft
#' @importFrom stats nextn
#'
sbd_proxy <- function(x, y = NULL, znorm = FALSE, ..., error.check = TRUE, pairwise = FALSE) {
sbd_proxy <- function(x, y = NULL, znorm = FALSE, ...,
error.check = TRUE, pairwise = FALSE, lower_triangular_only = FALSE)
{
x <- tslist(x)

if (error.check) check_consistency(x, "vltslist")
Expand Down Expand Up @@ -163,6 +165,7 @@ sbd_proxy <- function(x, y = NULL, znorm = FALSE, ..., error.check = TRUE, pairw

if (is_multivariate(c(x,y))) stop("SBD does not support multivariate series.") # nocov
fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
diagonal <- FALSE
eval(prepare_expr) # UTILS-expressions.R

# calculate distance matrix
Expand All @@ -182,6 +185,14 @@ sbd_proxy <- function(x, y = NULL, znorm = FALSE, ..., error.check = TRUE, pairw
dim(D) <- NULL
class(D) <- "pairdist"
}
else if (lower_triangular_only) {
dim(D) <- NULL
class(D) <- "dist"
attr(D, "Size") <- length(x)
attr(D, "Diag") <- FALSE
attr(D, "Upper") <- FALSE
attr(D, "Labels") <- names(x)
}
else {
dimnames(D) <- dim_names
class(D) <- "crossdist"
Expand Down
15 changes: 13 additions & 2 deletions R/DISTANCES-sdtw.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ sdtw <- function(x, y, gamma = 0.01, ..., error.check = TRUE)
# Wrapper for proxy::dist
# ==================================================================================================

sdtw_proxy <- function(x, y = NULL, gamma = 0.01, ..., error.check = TRUE, pairwise = FALSE) {
sdtw_proxy <- function(x, y = NULL, gamma = 0.01, ...,
error.check = TRUE, pairwise = FALSE, lower_triangular_only = FALSE)
{
x <- tslist(x)
if (error.check) check_consistency(x, "vltslist")
if (is.null(y)) {
Expand All @@ -56,10 +58,11 @@ sdtw_proxy <- function(x, y = NULL, gamma = 0.01, ..., error.check = TRUE, pairw
}

fill_type <- mat_type <- dim_names <- NULL # avoid warning about undefined globals
diagonal <- TRUE
eval(prepare_expr) # UTILS-expressions.R

# adjust parameters for this distance
if (!pairwise && symmetric)
if (!pairwise && symmetric && !lower_triangular_only)
diagonal <- sdtw_proxy(x, gamma = gamma, error.check = FALSE, pairwise = TRUE)
if (gamma <= 0) stop("The 'gamma' parameter must be positive")
mv <- is_multivariate(c(x, y))
Expand All @@ -78,6 +81,14 @@ sdtw_proxy <- function(x, y = NULL, gamma = 0.01, ..., error.check = TRUE, pairw
dim(D) <- NULL
class(D) <- "pairdist"
}
else if (lower_triangular_only) {
dim(D) <- NULL
class(D) <- c("distdiag", "dist")
attr(D, "Size") <- length(x)
attr(D, "Diag") <- TRUE
attr(D, "Upper") <- FALSE
attr(D, "Labels") <- names(x)
}
else {
dimnames(D) <- dim_names
if (symmetric) diag(D) <- diagonal
Expand Down
14 changes: 14 additions & 0 deletions R/S4-DistmatLowerTriangular.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,17 @@ methods::setOldClass("crossdist")
setAs("matrix", "Distmat", function(from) { Distmat$new(distmat = from) })
setAs("crossdist", "Distmat", function(from) { Distmat$new(distmat = from) })
setAs("dist", "Distmat", function(from) { DistmatLowerTriangular$new(distmat = from) })

#' @exportS3Method base::as.matrix
as.matrix.distdiag <- function(x) {
n <- attr(x, "Size")
m <- matrix(0, n, n)
m[lower.tri(m, diag = TRUE)] <- x

lbls <- attr(x, "Labels")
if (!is.null(lbls)) {
names(m) <- list(lbls, lbls)
}

m
}
14 changes: 12 additions & 2 deletions R/UTILS-expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@ prepare_expr <- quote({
if (pairwise && length(x) != length(y))
stop("Pairwise distances require the same amount of series in 'x' and 'y'.")
# Get appropriate matrix (big.matrix was needed before)
D <- allocate_distmat(length(x), length(y), pairwise, symmetric) # UTILS-utils.R
fill_type <- if (pairwise) "PAIRWISE" else if (symmetric) "SYMMETRIC" else "PRIMARY"
D <- allocate_distmat(length(x), length(y), pairwise, symmetric, lower_triangular_only, diagonal) # UTILS-utils.R

fill_type <- if (pairwise) {
"PAIRWISE"
} else if (lower_triangular_only) {
if (diagonal) "LOWER_TRIANGULAR_DIAGONAL" else "LOWER_TRIANGULAR"
} else if (symmetric) {
"SYMMETRIC"
} else {
"PRIMARY"
}

mat_type <- "R_MATRIX"
})
20 changes: 11 additions & 9 deletions R/UTILS-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -344,15 +344,17 @@ get_nthreads <- function() {
# ==================================================================================================

# allocate distance matrix for custom proxy loops
allocate_distmat <- function(x_len, y_len, pairwise, symmetric) {
if (pairwise)
D <- matrix(0, x_len, 1L)
else if (symmetric)
D <- matrix(0, x_len, x_len)
else
D <- matrix(0, x_len, y_len)
# return
D
allocate_distmat <- function(x_len, y_len, pairwise, symmetric, lower, diagonal) {
if (pairwise) {
matrix(0, x_len, 1L)
} else if (lower) {
diagonal_factor <- if (diagonal) 1L else -1L
matrix(0, x_len * (x_len + diagonal_factor) / 2L, 1L)
} else if (symmetric) {
matrix(0, x_len, x_len)
} else {
matrix(0, x_len, y_len)
}
}

# Euclidean norm
Expand Down
5 changes: 5 additions & 0 deletions R/pkg.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@
"_PACKAGE"

# PREFUN for some of my proxy distances so that they support 'pairwise' directly
#' @importFrom rlang caller_env
#' @importFrom rlang env_bind
proxy_prefun <- function(x, y, pairwise, params, reg_entry) {
if (!is.null(reg_entry) && "sdtw" %in% reg_entry$names) {
rlang::env_bind(rlang::caller_env(), diag = TRUE)
}
params$pairwise <- pairwise
list(x = x, y = y, pairwise = pairwise, p = params, reg_entry = reg_entry)
}
Expand Down
1 change: 1 addition & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
\itemize{
\item Update Makevars for ARM version of Windows.
\item Sanitize internal usage of \code{do.call} to avoid huge backtraces.
\item Support lower triangular \code{distmat} objects for symmetric distances.
}
}
12 changes: 7 additions & 5 deletions src/distmat/distmat-loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@

namespace dtwclust {

static auto distmat_factory = DistmatFactory();
static auto distance_calculator_factory = DistanceCalculatorFactory();
static auto distmat_filler_factory = DistmatFillerFactory();

extern "C" SEXP distmat_loop(SEXP D, SEXP X, SEXP Y,
SEXP DIST, SEXP DIST_ARGS,
SEXP FILL_TYPE, SEXP MAT_TYPE, SEXP NUM_THREADS)
{
BEGIN_RCPP
auto distmat = DistmatFactory().create(MAT_TYPE, D);
auto dist_calculator = DistanceCalculatorFactory()
.create(DIST, DIST_ARGS, X, Y);
auto distmat_filler = DistmatFillerFactory()
.create(FILL_TYPE, NUM_THREADS, distmat, dist_calculator);
auto distmat = distmat_factory.create(MAT_TYPE, D);
auto dist_calculator = distance_calculator_factory.create(DIST, DIST_ARGS, X, Y);
auto distmat_filler = distmat_filler_factory.create(FILL_TYPE, NUM_THREADS, distmat, dist_calculator);
distmat_filler->fill();
return R_NilValue;
END_RCPP
Expand Down
Loading

0 comments on commit ad3bf5b

Please sign in to comment.