Skip to content

Commit

Permalink
v0.4.2, new plot functions, sped up alignment for ms search
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanbass committed Aug 15, 2024
1 parent 1196281 commit 744d304
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 34 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mzinspectr
Type: Package
Title: Read and Analyze Mass Spectrometry Alignment Files
Version: 0.4.1
Version: 0.4.2
Authors@R:
person("Ethan", "Bass", , "ethanbass@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-6175-6739"))
Expand All @@ -11,6 +11,7 @@ URL: https://github.com/ethanbass/mzinspectr
BugReports: https://github.com/ethanbass/mzinspectr/issues
Imports:
dplyr,
Formula,
fs,
OrgMassSpecR,
pbapply,
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Generated by roxygen2: do not edit by hand

S3method(boxplot,ms_alignment)
S3method(dim,ms_alignment)
S3method(head,ms_alignment)
S3method(ms_mirror_plot,data.frame)
S3method(ms_mirror_plot,ms_alignment)
S3method(plot,ms_alignment)
S3method(print,ms_alignment)
S3method(row.names,ms_alignment)
S3method(tail,ms_alignment)
Expand All @@ -25,23 +27,29 @@ export(ms_search_gadget)
export(ms_search_spectra)
export(ms_subtract_blanks)
export(ms_tidy_msdial)
importFrom(Formula,Formula)
importFrom(OrgMassSpecR,SpectrumSimilarity)
importFrom(dplyr,any_of)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(graphics,abline)
importFrom(graphics,boxplot)
importFrom(graphics,legend)
importFrom(graphics,lines)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,plot.new)
importFrom(graphics,rasterImage)
importFrom(graphics,text)
importFrom(graphics,title)
importFrom(parallel,mclapply)
importFrom(pbapply,pblapply)
importFrom(stats,approx)
importFrom(stats,as.formula)
importFrom(stats,lm)
importFrom(stats,median)
importFrom(stats,reformulate)
importFrom(stats,terms)
importFrom(stringr,str_split_fixed)
importFrom(tidyr,pivot_longer)
importFrom(utils,head)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# mzinspectr 0.4.2

* Sped up spectral alignment to further improve speed of spectral library searching (by about 50%).
* Added additional plot functions for `ms_alignment` objects: `plot.ms_alignment` and `boxplot.ms.alignment`.

# mzinspectr 0.4.1

* Refactored `spectral_similarity` to speed up spectral library search.
Expand Down
126 changes: 126 additions & 0 deletions R/plot.ms_alignment.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#' Plot spectrum from peak table
#'
#' Plots the trace and/or spectrum for a given peak in peak table.
#'
#' Can be used to confirm the identity of a peak or check that a particular
#' column in the peak table represents a single compound. Can also be used
#' to create simple box-plots to examine the distribution of a peak with respect
#' to variables defined in sample metadata.
#'
#' @importFrom graphics title boxplot
#' @importFrom stats as.formula
#' @param x A \code{ms.alignment} object.
#' @param col A vector specifying the peak(s) that you wish to plot.
#' @param plot_spectrum Logical. If TRUE, plots the mass spectrum of the chosen
#' peak. Defaults to TRUE.
#' @param box_plot Logical. If TRUE, plots box plot using factors
#' defined by \code{vars}.
#' @param vars Independent variables for boxplot. Righthand side of formula.
#' @param spectrum_labels Logical. If TRUE, plots labels on maxima in spectral
#' plot. Defaults to TRUE.
#' @param engine Which plotting engine to use: either \code{base} or \code{plotly}.
#' @param ... Additional arguments to \code{\link[graphics]{boxplot}}.
#' @return No return value.
#' @section Side effects:
#'
#' If \code{plot_spectrum} is TRUE, plots the spectrum for the specified chromatogram
#' at the specified retention time. The spectrum is a single row from the chromatographic
#' matrix.
#'
#' If \code{box_plot} is TRUE, produces a \code{\link[graphics]{boxplot}} from the
#' specified peak with groups provided by \code{vars}.
#'
#' @author Ethan Bass
#' @rdname plot.peak_table
#' @concept Visualization
#' @export

plot.ms_alignment <- function(x, col, plot_spectrum = TRUE,
box_plot = FALSE, vars = NULL,
spectrum_labels = TRUE,
engine = c("base", "plotly"), ...){
engine <- match.arg(engine, c("base", "plotly"))
for (col in col){
if (plot_spectrum){
ms_plot_spectrum(x = x, col = col, plot_labels = spectrum_labels,
type = engine)
}
if (box_plot){
if (!is.data.frame(x$sample_meta)){
stop("Attach metadata to `peak_table` to make a boxplot.")
}
if (is.null(vars)){
stop("Must provide independent variable(s) (`var`) to make a boxplot.")
}
boxplot(as.formula(paste("x[['tab']][,col]", vars, sep="~")),
data = x$sample_meta,
main = paste(col, '\n', 'RT = ',
round(as.numeric(x$peak_meta[col, "Average.Rt.min."]), 2)),
ylab = "abs", xlab = "", ...)
}
}
}

#' Make boxplot from MS peak table.
#'
#' The function can take multiple response variables on the left hand side of the
#' formula (separated by \code{+}). In this case, a separate boxplot will be
#' produced for each response variable.
#'
#' @param x A peak_table object
#' @param formula A formula object
#' @param ... Additional arguments to \code{\link[graphics]{boxplot}}
#' @importFrom stats reformulate terms
#' @importFrom graphics boxplot
#' @concept Visualization
#' @export

boxplot.ms_alignment <- function(x, formula, ...){
if (missing(formula)){
stop("Please provide a `formula` to make a boxplot.")
}
lhs <- get_lhs_vars(formula)
for (li in lhs){
response <- paste0("x[['tab']][,'", li, "']")
form <- reformulate(labels(terms(formula)), response = response)

idx <- ifelse(!is.na(x$peak_meta[col,"Average.RI"]),
paste0("RI: ", x$peak_meta[col,"Average.RI"]),
paste0("RT: ", x$peak_meta[col,"Average.Rt.min."])
)
title <- paste(li, idx, sep = "\n")
boxplot(form,
data = x$sample_meta,
main = title,
ylab = "", xlab = "", ...)
}
}

#' Extract variables from the left-hand-side of a formula.
#' @param formula A \code{\link{formula}} object.
#' @importFrom Formula Formula
#' @noRd
#' @note Adapted from https://github.com/adibender/pammtools/blob/master/R/formula-utils.R
#' (c) Copyright © 2017 Andreas Bender and Fabian Scheipl under MIT license:
#' Permission is hereby granted, free of charge, to any person obtaining a copy of
#' this software and associated documentation files (the “Software”), to deal in
#' the Software without restriction, including without limitation the rights to use,
#' copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
#' Software, and to permit persons to whom the Software is furnished to do so,
#' subject to the following conditions:
#'
#' The above copyright notice and this permission notice shall be included in all
#' copies or substantial portions of the Software.
#'
#' THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
#' WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
#' CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

get_lhs_vars <- function(formula) {
if (is.character(formula) ) formula <- as.formula(formula)
form <- formula(Formula::Formula(formula), lhs = TRUE, rhs = FALSE)
all.vars(form)
}
30 changes: 20 additions & 10 deletions R/plot_spectrum.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,15 @@ ms_plot_spectrum <- function(x, col, plot_labels = TRUE, lab_int = 0.2,
yaxis = list(title = "Ion Intensity")
)
p <- plotly::plot_ly()
p <- plotly::add_trace(p, type="bar", x=spec$mz, y=spec$intensity, marker=list(line=list(width=bar_width)))
p <- plotly::layout(p, title=layout$title, xaxis=layout$xaxis, yaxis=layout$yaxis)
p <- plotly::add_trace(p, type = "bar", x = spec$mz, y = spec$intensity,
marker=list(line = list(width = bar_width)))
p <- plotly::layout(p, title = layout$title, xaxis = layout$xaxis,
yaxis = layout$yaxis)
if (plot_labels){
p <- plotly::add_annotations(p, x = ~spec$mz[lab.idx], y = ~spec$intensity[lab.idx], text=~round(spec$mz[lab.idx], digits), yshift=10, xshift=1, showarrow=FALSE )
p <- plotly::add_annotations(p, x = ~spec$mz[lab.idx],
y = ~spec$intensity[lab.idx],
text = ~round(spec$mz[lab.idx], digits),
yshift = 10, xshift = 1, showarrow = FALSE )
}
p
}
Expand Down Expand Up @@ -87,9 +92,11 @@ ms_mirror_plot <- function(x, ...){
#' @method ms_mirror_plot data.frame
#' @export

ms_mirror_plot.data.frame <- function(x, y, plot_labels = TRUE, type = c("plotly", "base"),
scale = TRUE, lab_int = 0.2, digits = 1, bar_width = 1,
match_score = TRUE, ...){
ms_mirror_plot.data.frame <- function(x, y, plot_labels = TRUE,
type = c("plotly", "base"),
scale = TRUE, lab_int = 0.2,
digits = 1, bar_width = 1,
match_score = TRUE, ...){
type <- match.arg(type, c("plotly", "base"))
colnames(x) <- c("mz","intensity")
colnames(y) <- c("mz","intensity")
Expand All @@ -115,9 +122,11 @@ ms_mirror_plot.data.frame <- function(x, y, plot_labels = TRUE, type = c("plotly
}
if (match_score){
y[,2] <- -y[,2]
match <- try(OrgMassSpecR::SpectrumSimilarity(spec.top = x, spec.bottom = y,
match <- try(OrgMassSpecR::SpectrumSimilarity(spec.top = x,
spec.bottom = y,
print.graphic = FALSE))
label <- paste0("Similarity score: ", format(round(match, 2), nsmall = 2), "%")
label <- paste0("Similarity score: ",
format(round(match, 2), nsmall = 2), "%")
legend("top", legend = label, cex = 0.7, bty = "n")
}
} else if (type == "plotly"){
Expand Down Expand Up @@ -183,6 +192,7 @@ ms_mirror_plot.ms_alignment <- function(x, cols, ref, type=c("plotly", "base"),
spec2 <- tidy_eispectrum(x$matches[[cols[1]]][ref, "Spectra"])
}
ms_mirror_plot(x = spec1, y = spec2, plot_labels = plot_labels,
lab_int = lab_int, type = type, scale = scale, bar_width = bar_width,
digits = digits, match_score = match_score)
lab_int = lab_int, type = type, scale = scale,
bar_width = bar_width, digits = digits,
match_score = match_score)
}
51 changes: 33 additions & 18 deletions R/search_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ ms_search_spectra <- function(x, db, cols, ..., ri_thresh = 100, spectral_weight
progress_bar = TRUE){
if (!is.null(ri_thresh) && all(is.na(x$peak_meta$Average.RI))){
stop(paste("Retention indices are not present. Please add retention indices using \n \t",
sQuote("ms_calculate_RIs"), "before proceeding or set", sQuote("ri_thresh = NULL"),"."))
sQuote("ms_calculate_RIs"), "before proceeding or set",
sQuote("ri_thresh = NULL"), "."))
}
if (is.null(x$matches)){
x$matches <- as.list(rep(NA, ncol(x$tab)))
Expand Down Expand Up @@ -91,7 +92,6 @@ ms_search_spectra <- function(x, db, cols, ..., ri_thresh = 100, spectral_weight
#' @return Returns spectrum as a data.frame with two columns: "mz" and "intensity".
#' @author Ethan Bass
#' @export

ms_get_spectrum <- function(x, col){
spec <- tidy_eispectrum(x$peak_meta[col, "EI.spectrum"])
spec
Expand Down Expand Up @@ -163,7 +163,7 @@ msp_to_dataframe <- function(db){
#' @param spec.bottom data frame containing the reference spectrum's peak list
#' with the m/z values in the first column and corresponding intensities in the
#' second.
#' @param t numeric value specifying the tolerance used to align the m/z values
#' @param tol numeric value specifying the tolerance used to align the m/z values
#' of the two spectra.
#' @param b numeric value specifying the baseline threshold for peak
#' identification. Expressed as a percent of the maximum intensity.
Expand All @@ -177,7 +177,7 @@ msp_to_dataframe <- function(db){
#' @author Nathan G. Dodder
#' @author Ethan Bass

spectral_similarity <- function(spec.top, spec.bottom, t = 0.25, b = 10,
spectral_similarity <- function(spec.top, spec.bottom, tol = 0.25, b = 10,
xlim = c(50, 1200), x.threshold = 0){
if (x.threshold < 0){
stop("x.threshold argument must be zero or a positive number")
Expand All @@ -186,20 +186,7 @@ spectral_similarity <- function(spec.top, spec.bottom, t = 0.25, b = 10,
top <- normalize_spectrum(spec.top, xlim = xlim, b = b)
bottom <- normalize_spectrum(spec.bottom, xlim = xlim, b = b)

for (i in 1:nrow(bottom)){
top[, 1][which(bottom[, 1][i] >= top[,1] - t & bottom[, 1][i] <= top[, 1] + t)] <- bottom[,1][i]
}

mz <- unique(c(top$mz, bottom$mz))
alignment <- cbind(mz, x = top[match(mz, top[, "mz"]), "normalized"],
y = bottom[match(mz, bottom[, "mz"]), "normalized"])

if (length(unique(alignment[, 1])) != length(alignment[, 1]))
warning("the m/z tolerance is set too high")
alignment[, 3][is.na(alignment[, 3])] <- 0
alignment[, c(2,3)][is.na(alignment[, c(2,3)])] <- 0

names(alignment) <- c("mz", "intensity.top", "intensity.bottom")
alignment <- align_spectra(top, bottom, tol = tol)

alignment <- alignment[alignment[, 1] >= x.threshold, ]
u <- alignment[, 2]
Expand All @@ -216,3 +203,31 @@ normalize_spectrum <- function(spec, b, xlim){
spec <- spec[which(spec$mz >= xlim[1] & spec$mz <= xlim[2]),]
spec[which(spec$normalized >= b),]
}

#' Align spectra
#' @noRd
align_spectra <- function(top, bottom, tol){
idx <- outer(bottom$mz, top$mz, function(b, t) abs(b - t) <= tol)

top$mz <- sapply(seq_len(ncol(idx)), function(i){
x <- idx[,i]
if (any(x)){
bottom$mz[which(x)[1]]
} else{
top$mz[i]
}
})

mz <- unique(c(top$mz, bottom$mz))

alignment <- cbind(mz, x = top[match(mz, top[, "mz"]), "normalized"],
y = bottom[match(mz, bottom[, "mz"]), "normalized"])

if (length(unique(alignment[, 1])) != length(alignment[, 1]))
warning("the m/z tolerance is set too high")

alignment[,"y"][is.na(alignment[,"y"])] <- 0
alignment[, c("x", "y")][is.na(alignment[, c("x", "y")])] <- 0
colnames(alignment) <- c("mz", "intensity.top", "intensity.bottom")
alignment
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ Spectra can be plotted using either "base R" graphics or "plotly" graphics using

If you use mzinspectr in published work, please cite it as follows:

Bass, E. (2023). mzinspectr: Read and Analyze Mass Spectrometry Alignment Files (version 0.4.1). https://doi.org/10.5281/zenodo.10426253.
Bass, E. (2023). mzinspectr: Read and Analyze Mass Spectrometry Alignment Files (version 0.4.2). https://doi.org/10.5281/zenodo.10426253.


4 changes: 2 additions & 2 deletions inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ citEntry(
title = "mzinspectr: Read and Analyze Mass Spectrometry Alignment Files",
author = "Ethan Bass",
year = "2023",
note = "version 0.4.1",
note = "version 0.4.2",
url = "",
doi = "10.5281/zenodo.10426253",
textVersion = paste("Bass, E. (2023).",
"mzinspectr: Read and Analyze Mass Spectrometry Alignment Files (version 0.4.1).",
"mzinspectr: Read and Analyze Mass Spectrometry Alignment Files (version 0.4.2).",
"https://doi.org/10.5281/zenodo.10426253."
)
)
21 changes: 21 additions & 0 deletions man/boxplot.ms_alignment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 744d304

Please sign in to comment.