From fb8114499c13edfb77cb46b88351c528f280c2cc Mon Sep 17 00:00:00 2001
From: evanbiederstedt <evan.biederstedt@gmail.com>
Date: Sun, 25 Sep 2022 20:18:13 -0400
Subject: [PATCH] fix warnings

---
 R/RcppExports.R          |  4 ++++
 R/integrations.R         | 31 ++++++++++++++++++++++++-------
 man/armaCor.Rd           | 20 ++++++++++++++++++++
 man/velocityInfoConos.Rd |  6 ++++++
 src/RcppExports.cpp      | 13 +++++++++++++
 src/spcov.cpp            | 17 +++++++++++++++++
 6 files changed, 84 insertions(+), 7 deletions(-)
 create mode 100644 man/armaCor.Rd

diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8e182827..3ca6333c 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -85,3 +85,7 @@ spcov <- function(m, cm) {
     .Call('_conos_spcov', PACKAGE = 'conos', m, cm)
 }
 
+arma_mat_cor <- function(m) {
+    .Call('_conos_arma_mat_cor', PACKAGE = 'conos', m)
+}
+
diff --git a/R/integrations.R b/R/integrations.R
index 9e3c5ab6..b84b5bb9 100644
--- a/R/integrations.R
+++ b/R/integrations.R
@@ -523,6 +523,7 @@ p2app4conos <- function(conos, cdl=NULL, metadata=NULL, filename='conos_app.bin'
 }
 
 
+
 #' Filter genes by requiring minimum average expression within at least one of the provided cell clusters
 #'
 #' @param emat spliced (exonic) count matrix
@@ -539,6 +540,19 @@ filter.genes.by.cluster.expression <- function(emat, clusters, min.max.cluster.a
 }
 
 
+
+#' A slightly faster way of calculating column correlation matrix
+#' @param mat matrix whose columns will be correlated
+#' @param nthreads number of threads to use 
+#' @return correlation matrix 
+#' @keywords internal
+armaCor <- function(mat,nthreads=1) {
+  cd <- arma_mat_cor(mat);
+  rownames(cd) <- colnames(cd) <- colnames(mat);
+  return(cd)
+}
+
+
 #' RNA velocity analysis on samples integrated with conos
 #' Create a list of objects to pass into gene.relative.velocity.estimates function from the velocyto.R package
 #'
@@ -548,6 +562,9 @@ filter.genes.by.cluster.expression <- function(emat, clusters, min.max.cluster.a
 #' @param groups set of clusters to use (default=NULL). Ignored if 'clustering' is not NULL. 
 #' @param n.odgenes numeric Number of overdispersed genes to use for PCA (default=2000).
 #' @param verbose boolean Whether to use verbose mode (default=TRUE)
+#' @param min.max.cluster.average.emat Required minimum average expression count for emat, the spliced (exonic) count matrix (default=0.2). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'
+#' @param min.max.cluster.average.nmat Required minimum average expression count for nmat, the unspliced (nascent) count matrix (default=0.05). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'
+#' @param min.max.cluster.average.smat Required minimum average expression count for smat, the spanning read matrix (used in offset calculations) (default=0.01). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'
 #' @return List with cell distances, combined spliced expression matrix, combined unspliced expression matrix, combined matrix of spanning reads, cell colors for clusters and embedding (taken from conos)
 #' @export
 velocityInfoConos <- function(cms.list, con, clustering=NULL, groups=NULL, n.odgenes=2e3, verbose=TRUE, min.max.cluster.average.emat=0.2, min.max.cluster.average.nmat=0.05, min.max.cluster.average.smat=0.01) {
@@ -573,9 +590,9 @@ velocityInfoConos <- function(cms.list, con, clustering=NULL, groups=NULL, n.odg
   cms.list <- lapply(cms.list, function(x) {lapply(x, function(y) {y[row.names(y) %in% common.genes,]} )} )
 
   # Merge velocity files from different samples
-  emat <- do.call(cbind, lapply(cms.list, function(x) {x[[1]]}))
-  nmat <- do.call(cbind, lapply(cms.list, function(x) {x[[2]]}))
-  smat <- do.call(cbind, lapply(cms.list, function(x) {x[[3]]}))
+  emat <- do.call(cbind, lapply(cms.list, function(x) {x[[1]]}))  ## emat - spliced (exonic) count matrix
+  nmat <- do.call(cbind, lapply(cms.list, function(x) {x[[2]]}))  ## nmat - unspliced (nascent) count matrix
+  smat <- do.call(cbind, lapply(cms.list, function(x) {x[[3]]}))  ## smat - optional spanning read matrix (used in offset calculations)
 
   # Keep the order of cells consistent between velocity matrices and the embedding (not really sure whether it's necessary...)
   emat <- emat[,order(match(colnames(emat), rownames(emb)))]
@@ -588,12 +605,12 @@ velocityInfoConos <- function(cms.list, con, clustering=NULL, groups=NULL, n.odg
   # Again, keep the order of cells consistent
   pcs <- pcs[order(match(rownames(pcs), rownames(emb))),]
   # Calculate the cell distances based on correlation
-  cell.dist <- as.dist(1 - velocyto.R::armaCor(t(pcs)))
+  cell.dist <- as.dist(1 - armaCor(t(pcs)))
 
   if (verbose) message("Filtering velocity...\n")
-  emat %<>% velocyto.R::filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.emat)
-  nmat %<>% velocyto.R::filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.nmat)
-  smat %<>% velocyto.R::filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.smat)
+  emat %<>% filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.emat)
+  nmat %<>% filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.nmat)
+  smat %<>% filter.genes.by.cluster.expression(groups, min.max.cluster.average=min.max.cluster.average.smat)
 
   if (verbose) message("All Done!")
   return(list(cell.dist=cell.dist, emat=emat, nmat=nmat, smat=smat, cell.colors=cell.colors, emb=emb))
diff --git a/man/armaCor.Rd b/man/armaCor.Rd
new file mode 100644
index 00000000..7eaaa6ba
--- /dev/null
+++ b/man/armaCor.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/integrations.R
+\name{armaCor}
+\alias{armaCor}
+\title{A slightly faster way of calculating column correlation matrix}
+\usage{
+armaCor(mat, nthreads = 1)
+}
+\arguments{
+\item{mat}{matrix whose columns will be correlated}
+
+\item{nthreads}{number of threads to use}
+}
+\value{
+correlation matrix
+}
+\description{
+A slightly faster way of calculating column correlation matrix
+}
+\keyword{internal}
diff --git a/man/velocityInfoConos.Rd b/man/velocityInfoConos.Rd
index 0122c243..9239f8c1 100644
--- a/man/velocityInfoConos.Rd
+++ b/man/velocityInfoConos.Rd
@@ -29,6 +29,12 @@ velocityInfoConos(
 \item{n.odgenes}{numeric Number of overdispersed genes to use for PCA (default=2000).}
 
 \item{verbose}{boolean Whether to use verbose mode (default=TRUE)}
+
+\item{min.max.cluster.average.emat}{Required minimum average expression count for emat, the spliced (exonic) count matrix (default=0.2). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'}
+
+\item{min.max.cluster.average.nmat}{Required minimum average expression count for nmat, the unspliced (nascent) count matrix (default=0.05). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'}
+
+\item{min.max.cluster.average.smat}{Required minimum average expression count for smat, the spanning read matrix (used in offset calculations) (default=0.01). Note: no normalization is perfomed. See the parameter 'min.max.cluster.average' in the function 'filter.genes.by.cluster.expression.'}
 }
 \value{
 List with cell distances, combined spliced expression matrix, combined unspliced expression matrix, combined matrix of spanning reads, cell colors for clusters and embedding (taken from conos)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index f7a6cb1f..a9fae0f2 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -2,6 +2,7 @@
 // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 #include <RcppArmadillo.h>
+#define NDEBUG 1
 #include <RcppEigen.h>
 #include <Rcpp.h>
 
@@ -333,6 +334,17 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// arma_mat_cor
+arma::mat arma_mat_cor(const arma::mat& m);
+RcppExport SEXP _conos_arma_mat_cor(SEXP mSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::mat& >::type m(mSEXP);
+    rcpp_result_gen = Rcpp::wrap(arma_mat_cor(m));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_conos_RjnmfC", (DL_FUNC) &_conos_RjnmfC, 8},
@@ -356,6 +368,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_conos_adjacent_vertices", (DL_FUNC) &_conos_adjacent_vertices, 1},
     {"_conos_adjacent_vertex_weights", (DL_FUNC) &_conos_adjacent_vertex_weights, 2},
     {"_conos_spcov", (DL_FUNC) &_conos_spcov, 2},
+    {"_conos_arma_mat_cor", (DL_FUNC) &_conos_arma_mat_cor, 1},
     {NULL, NULL, 0}
 };
 
diff --git a/src/spcov.cpp b/src/spcov.cpp
index 5030bc95..a802dc1a 100644
--- a/src/spcov.cpp
+++ b/src/spcov.cpp
@@ -1,5 +1,15 @@
+
+
 // [[Rcpp::depends(RcppEigen)]]
+// [[Rcpp::depends(RcppArmadillo)]]
+
+
+#include <RcppArmadillo.h>
+#define NDEBUG 1
 #include <RcppEigen.h>
+#include <Rcpp.h>
+
+
 // [[Rcpp::export]]
 Eigen::MatrixXd spcov(const Eigen::SparseMatrix<double>& m,Eigen::VectorXd cm) {
   Eigen::MatrixXd v=m.transpose()*m; 
@@ -7,3 +17,10 @@ Eigen::MatrixXd spcov(const Eigen::SparseMatrix<double>& m,Eigen::VectorXd cm) {
   v/=((double) m.rows()-1);
   return(v);
 }
+
+
+// quick matrix correlation function
+// [[Rcpp::export]]
+arma::mat arma_mat_cor(const arma::mat& m) {
+  return(cor(m));
+}