diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..cd67886 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,165 @@ +name: Weekly global Tool Linting and Tests +on: + schedule: + # Run at midnight every monday + - cron: '0 0 * * 1' + repository_dispatch: + types: [run-all-tool-tests-command] +env: + GALAXY_RELEASE: release_20.09 +jobs: + setup: + name: Setup cache + if: github.repository_owner == 'computational-metabolomics' + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7] + steps: + - name: Add reaction + if: ${{ github.event.client_payload.slash_command.command == 'run-all-tool-tests' }} + uses: peter-evans/create-or-update-comment@v1 + with: + token: ${{ secrets.PAT }} + repository: ${{ github.event.client_payload.github.payload.repository.full_name }} + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + reaction-type: hooray + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_${{ matrix.python-version }}_${{ env.GALAXY_RELEASE }} + # Install the `wheel` package so that when installing other packages which + # are not available as wheels, pip will build a wheel for them, which can be cached. + - name: Install wheel + run: pip install wheel + - name: Install Planemo + run: pip install planemo + - name: Fake a planemo run to update cache + run: | + touch tool.xml + PIP_QUIET=1 planemo test --galaxy_python_version ${{ matrix.python-version }} --no_conda_auto_init --galaxy_branch $GALAXY_RELEASE + test: + name: Test tools + # This job runs on Linux + runs-on: ubuntu-latest + needs: setup + strategy: + fail-fast: false + matrix: + chunk: [0,1,2,3] + python-version: [3.7] + services: + postgres: + image: postgres:11 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + ports: + - 5432:5432 + steps: + # checkout the repository + # and use it as the current working directory + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_${{ matrix.python-version }}_${{ env.GALAXY_RELEASE }} + - name: Install Planemo + run: pip install planemo + - name: Planemo ci_find_tools + run: planemo ci_find_tools --chunk_count 4 --chunk ${{ matrix.chunk }} --exclude test_repositories --exclude packages --exclude deprecated --exclude_from .tt_skip --exclude_from .tt_biocontainer_skip --group_tools --output tool.list + - name: Show repo list + run: cat tool.list + - name: Planemo test tools + run: | + mkdir json_output/ + while read -r TOOLS; do + if grep -qf .tt_biocontainer_skip <(echo $TOOLS); then + PLANEMO_OPTIONS="" + else + PLANEMO_OPTIONS="--biocontainers --no_dependency_resolution --no_conda_auto_init" + fi + json=$(mktemp -u -p json_output --suff .json) + PIP_QUIET=1 planemo test --database_connection postgresql://postgres:postgres@localhost:5432/galaxy $PLANEMO_OPTIONS --galaxy_branch $GALAXY_RELEASE --galaxy_python_version ${{ matrix.python-version }} --test_output_json $json $TOOLS || true + docker system prune --all --force --volumes || true + done < tool.list + - name: Merge tool_test_output.json files + run: planemo merge_test_reports json_output/*.json tool_test_output.json + - name: Create tool_test_output.html + run: planemo test_reports tool_test_output.json --test_output tool_test_output.html + - name: Copy artifacts into place + run: | + mkdir upload + mv tool_test_output.json tool_test_output.html upload/ + - uses: actions/upload-artifact@v2.0.1 + with: + name: 'Tool test output ${{ matrix.chunk }}' + path: upload + + combine_outputs: + name: Combine chunked test results + needs: test + strategy: + matrix: + python-version: [3.7] + # This job runs on Linux + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v2 + with: + path: artifacts + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install Planemo + run: pip install planemo + - name: Install jq + run: sudo apt-get install jq + - name: Combine outputs + run: find artifacts -name tool_test_output.json -exec sh -c 'planemo merge_test_reports "$@" tool_test_output.json' sh {} + + - name: Create tool_test_output.html + run: planemo test_reports tool_test_output.json --test_output tool_test_output.html + - name: Copy artifacts into place + run: | + mkdir upload + mv tool_test_output.json tool_test_output.html upload/ + - uses: actions/upload-artifact@v2.0.1 + with: + name: 'All tool test results' + path: upload + + - name: Create URL to the run output + if: ${{ github.event.client_payload.slash_command.command == 'run-all-tool-tests' }} + id: vars + run: echo ::set-output name=run-url::https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID + + - name: Create comment + if: ${{ github.event.client_payload.slash_command.command == 'run-all-tool-tests' }} + uses: peter-evans/create-or-update-comment@v1 + with: + token: ${{ secrets.PAT }} + repository: ${{ github.event.client_payload.github.payload.repository.full_name }} + issue-number: ${{ github.event.client_payload.github.payload.issue.number }} + body: | + [Find all tool test results here][1] + + [1]: ${{ steps.vars.outputs.run-url }} + - name: Check status of combined outputs + run: | + if jq '.["tests"][]["data"]["status"]' upload/tool_test_output.json | grep -v "success"; then + echo "Unsuccessful tests found, inspect the 'All tool test results' artifact for details." + exit 1 + fi diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 0000000..ce60228 --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,427 @@ +name: Galaxy Tool Linting and Tests for push and PR +on: [push, pull_request] +env: + GALAXY_REPO: https://github.com/galaxyproject/galaxy + GALAXY_RELEASE: release_20.09 +jobs: + # the setup job does two things: + # 1. cache the pip cache and .planemo + # 2. determine the list of changed repositories + # it produces one artifact which contains + # - a file with the latest SHA from the chosen branch of the Galaxy repo + # - a file containing the list of changed repositories + # which are needed in subsequent steps. + setup: + + name: Setup cache and determine changed repositories + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7] + steps: + - name: Print github context properties + run: | + echo 'event: ${{ github.event_name }}' + echo 'sha: ${{ github.sha }}' + echo 'ref: ${{ github.ref }}' + echo 'head_ref: ${{ github.head_ref }}' + echo 'base_ref: ${{ github.base_ref }}' + echo 'event.before: ${{ github.event.before }}' + echo 'event.after: ${{ github.event.after }}' + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(git ls-remote ${{ env.GALAXY_REPO }} refs/heads/${{ env.GALAXY_RELEASE }} | cut -f1)" >> $GITHUB_ENV + - name: Save latest galaxy commit to artifact file + run: echo $GALAXY_HEAD_SHA > galaxy.sha + - uses: actions/upload-artifact@v2 + with: + name: Workflow artifacts + path: galaxy.sha + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Cache .planemo + uses: actions/cache@v2 + id: cache-planemo + with: + path: ~/.planemo + key: planemo_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + # Install the `wheel` package so that when installing other packages which + # are not available as wheels, pip will build a wheel for them, which can be cached. + - name: Install wheel + run: pip install wheel + - name: Install Planemo and flake8 + run: pip install planemo flake8 flake8-import-order + - name: Fake a Planemo run to update cache + if: steps.cache-pip.outputs.cache-hit != 'true' || steps.cache-planemo.outputs.cache-hit != 'true' + run: | + touch tool.xml + PIP_QUIET=1 planemo test --galaxy_python_version ${{ matrix.python-version }} --no_conda_auto_init --galaxy_source $GALAXY_REPO --galaxy_branch $GALAXY_RELEASE + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + # The range of commits to check for changes is: + # - `origin/master...` for all events happening on a feature branch + # - for events on the master branch we compare against the sha before the event + # (note that this does not work for feature branch events since we want all + # commits on the feature branch and not just the commits of the last event) + # - for pull requests we compare against the 1st ancestor, given the current + # HEAD is the merge between the PR branch and the base branch + - name: Set commit range (push to the feature branch) + if: github.ref != 'refs/heads/master' && github.event_name == 'push' + run: | + git fetch origin master + echo "COMMIT_RANGE=origin/master..." >> $GITHUB_ENV + - name: Set commit range (push to the master branch, e.g. merge) + if: github.ref == 'refs/heads/master' && github.event_name == 'push' + run: echo "COMMIT_RANGE=${{ github.event.before }}.." >> $GITHUB_ENV + - name: Set commit range (pull request) + if: github.event_name == 'pull_request' + run: echo "COMMIT_RANGE=HEAD~.." >> $GITHUB_ENV + - name: Planemo ci_find_repos + run: planemo ci_find_repos --changed_in_commit_range $COMMIT_RANGE --exclude packages --exclude deprecated --exclude_from .tt_skip --output changed_repositories.list + - name: Show repo list + run: cat changed_repositories.list + - uses: actions/upload-artifact@v2.0.1 + with: + name: Workflow artifacts + path: changed_repositories.list + + # Planemo lint the changed repositories + lint: + name: Lint tools + needs: setup + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.7] + steps: + # checkout the repository + # and use it as the current working directory + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - uses: actions/download-artifact@v2 + with: + name: Workflow artifacts + path: ../workflow_artifacts/ + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(cat ../workflow_artifacts/galaxy.sha)" >> $GITHUB_ENV + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Install Planemo + run: pip install planemo + - name: Planemo lint + run: | + set -e + while read -r DIR; do + planemo shed_lint --tools --ensure_metadata --urls --report_level warn --fail_level error --recursive "$DIR"; + done < ../workflow_artifacts/changed_repositories.list + + # flake8 of Python scripts in the changed repositories + flake8: + name: Lint Python scripts + needs: setup + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.7] + steps: + # checkout the repository to master + # and use it as the current working directory + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - uses: actions/download-artifact@v2 + with: + name: Workflow artifacts + path: ../workflow_artifacts/ + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(cat ../workflow_artifacts/galaxy.sha)" >> $GITHUB_ENV + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Install flake8 + run: pip install flake8 flake8-import-order + - name: Flake8 + run: | + if [ -s ../workflow_artifacts/changed_repositories.list ]; then + flake8 $(cat ../workflow_artifacts/changed_repositories.list) + fi + + lintr: + name: Lint R scripts + needs: setup + runs-on: ubuntu-latest + strategy: + matrix: + r-version: [4.0.1] + steps: + # checkout the repository to master + # and use it as the current working directory + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/download-artifact@v2 + with: + name: Workflow artifacts + path: ../workflow_artifacts/ + - uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.r-version }} + - name: Cache R packages + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: r_cache_1${{ matrix.r-version }} + - name: Install non-R lintr dependencies + run: sudo apt-get install libcurl4-openssl-dev + - name: Install lintr + run: | + install.packages('remotes') + remotes::install_cran("lintr", force=TRUE) + shell: Rscript {0} + - name: lintr + run: | + library(lintr) + linters <- with_defaults(line_length_linter = NULL, + object_name_linter = NULL, + object_usage_linter = NULL, + cyclocomp_linter(complexity_limit = 25)) + con <- file("../workflow_artifacts/changed_repositories.list", "r") + status <- 0 + while (TRUE) { + repo <- readLines(con, n = 1) + if (length(repo) == 0) { + break + } + lnt <- lint_dir(repo, relative_path=T, linters=linters) + if (length(lnt) > 0) { + status <- 1 + for (l in lnt) { + rel_path <- paste(repo, l$filename, sep="/") + write(paste(paste(rel_path, l$line_number, l$column_number, sep=":"), l$message), stderr()) + } + } + } + quit(status = status) + shell: Rscript {0} + + # Planemo test the changed repositories, each chunk creates an artifact + # containing HTML and JSON reports for the executed tests + test: + name: Test tools + # This job runs on Linux + runs-on: ubuntu-latest + needs: setup + strategy: + fail-fast: false + matrix: + chunk: [0] + python-version: [3.7] + services: + postgres: + image: postgres:11 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + ports: + - 5432:5432 + steps: + # checkout the repository + # and use it as the current working directory + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - uses: actions/download-artifact@v2 + with: + name: Workflow artifacts + path: ../workflow_artifacts/ + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(cat ../workflow_artifacts/galaxy.sha)" >> $GITHUB_ENV + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Cache .planemo + uses: actions/cache@v2 + id: cache-planemo + with: + path: ~/.planemo + key: planemo_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Install Planemo + run: pip install planemo + - name: Planemo ci_find_tools + run: | + touch changed_repositories_chunk.list changed_tools_chunk.list + if [ -s ../workflow_artifacts/changed_repositories.list ]; then + if [ $(wc -l < ../workflow_artifacts/changed_repositories.list) -eq 1 ]; then + planemo ci_find_tools --chunk_count 1 --chunk ${{ matrix.chunk }} \ + --output changed_tools_chunk.list \ + $(cat ../workflow_artifacts/changed_repositories.list) + else + planemo ci_find_repos --chunk_count 1 --chunk ${{ matrix.chunk }} \ + --output changed_repositories_chunk.list \ + $(cat ../workflow_artifacts/changed_repositories.list) + fi + fi + - name: Show changed tools/repositories chunk list + run: cat changed_tools_chunk.list changed_repositories_chunk.list + - name: Planemo test tools + run: | + if grep -lqf .tt_biocontainer_skip changed_tools_chunk.list changed_repositories_chunk.list; then + PLANEMO_OPTIONS="" + else + PLANEMO_OPTIONS="--biocontainers --no_dependency_resolution --no_conda_auto_init" + fi + if [ -s changed_tools_chunk.list ]; then + PIP_QUIET=1 planemo test --database_connection postgresql://postgres:postgres@localhost:5432/galaxy $PLANEMO_OPTIONS --galaxy_source $GALAXY_REPO --galaxy_branch $GALAXY_RELEASE --galaxy_python_version ${{ matrix.python-version }} --test_output_json tool_test_output.json $(cat changed_tools_chunk.list) || true + docker system prune --all --force --volumes || true + elif [ -s changed_repositories_chunk.list ]; then + while read -r DIR; do + if [[ "$DIR" =~ ^data_managers.* ]]; then + TESTPATH=$(planemo ci_find_tools "$DIR") + else + TESTPATH="$DIR" + fi + PIP_QUIET=1 planemo test --database_connection postgresql://postgres:postgres@localhost:5432/galaxy $PLANEMO_OPTIONS --galaxy_source $GALAXY_REPO --galaxy_branch $GALAXY_RELEASE --galaxy_python_version ${{ matrix.python-version }} --test_output_json "$DIR"/tool_test_output.json "$TESTPATH" || true + docker system prune --all --force --volumes || true + done < changed_repositories_chunk.list + else + echo '{"tests":[]}' > tool_test_output.json + fi + - name: Merge tool_test_output.json files + run: find . -name tool_test_output.json -exec sh -c 'planemo merge_test_reports "$@" tool_test_output.json' sh {} + + - name: Create tool_test_output.html + run: planemo test_reports tool_test_output.json --test_output tool_test_output.html + - name: Copy artifacts into place + run: | + mkdir upload + mv tool_test_output.json tool_test_output.html upload/ + - uses: actions/upload-artifact@v2.0.1 + with: + name: 'Tool test output ${{ matrix.chunk }}' + path: upload + + # - combine the results of the test chunks (which will never fail due + # to `|| true`) and create a global test report as json and html which + # is provided as artifact + # - check if any tool test actually failed (by lookup in the combined json) + # and fail this step if this is the case + combine_outputs: + name: Combine chunked test results + needs: test + strategy: + matrix: + python-version: [3.7] + # This job runs on Linux + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v2 + with: + path: artifacts + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(cat ../workflow_artifacts/galaxy.sha)" >> $GITHUB_ENV + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy1_$GALAXY_HEAD_SHA + - name: Install Planemo + run: pip install planemo + - name: Install jq + run: sudo apt-get install jq + - name: Combine outputs + run: find artifacts/ -name tool_test_output.json -exec sh -c 'planemo merge_test_reports "$@" tool_test_output.json' sh {} + + - name: Create tool_test_output.html + run: planemo test_reports tool_test_output.json --test_output tool_test_output.html + - name: Copy artifacts into place + run: | + mkdir upload + mv tool_test_output.json tool_test_output.html upload/ + - uses: actions/upload-artifact@v2.0.1 + with: + name: 'All tool test results' + path: upload + - name: Check status of combined status + run: | + if jq '.["tests"][]["data"]["status"]' upload/tool_test_output.json | grep -v "success"; then + echo "Unsuccessful tests found, inspect the 'All tool test results' artifact for details." + exit 1 + fi + + # deploy the tools to the toolsheds (first TTS for testing) + deploy: + name: Deploy + needs: [lint,flake8,lintr,combine_outputs] + strategy: + matrix: + python-version: [3.7] + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' && github.repository_owner == 'computational-metabolomics' + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - uses: actions/download-artifact@v2 + with: + name: Workflow artifacts + path: ../workflow_artifacts/ + - name: Determine latest galaxy commit + run: echo "GALAXY_HEAD_SHA=$(cat ../workflow_artifacts/galaxy.sha)" >> $GITHUB_ENV + - name: Cache .cache/pip + uses: actions/cache@v2 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }}_gxy_$GALAXY_HEAD_SHA + - name: Install Planemo + run: pip install planemo + - name: Deploy on testtoolshed + env: + SHED_KEY: ${{ secrets.tts_api_key }} + run: | + while read -r DIR; do + planemo shed_update --shed_target testtoolshed --shed_key "${{ env.SHED_KEY }}" --force_repository_creation "$DIR" || exit 1; + done < ../workflow_artifacts/changed_repositories.list + continue-on-error: true + - name: Deploy on toolshed + env: + SHED_KEY: ${{ secrets.ts_api_key }} + run: | + while read -r DIR; do + planemo shed_update --shed_target toolshed --shed_key "${{ env.SHED_KEY }}" --force_repository_creation "$DIR" || exit 1; + done < ../workflow_artifacts/changed_repositories.list diff --git a/.github/workflows/slash.yaml b/.github/workflows/slash.yaml new file mode 100644 index 0000000..918e645 --- /dev/null +++ b/.github/workflows/slash.yaml @@ -0,0 +1,15 @@ +name: Slash Command Dispatch +on: + issue_comment: + types: [created] +jobs: + slashCommandDispatch: + runs-on: ubuntu-latest + steps: + - name: Slash Command Dispatch + if: github.repository_owner == 'computational-metabolomics' + uses: peter-evans/slash-command-dispatch@v2 + with: + token: ${{ secrets.PAT }} + commands: | + run-all-tool-tests diff --git a/.tt_skip b/.tt_skip new file mode 100644 index 0000000..e69de29 diff --git a/README.rst b/README.rst index 972669c..1cf7fbf 100644 --- a/README.rst +++ b/README.rst @@ -3,13 +3,13 @@ msPurity for Galaxy |Build Status (Travis)| |Git| |Bioconda| |License| -Version v1.12.2+galaxy3 ------- +Version v1.16.2+galaxy0 +------------------------ - msPurity - - bioconductor-mspurity v1.12.2 + - bioconductor-mspurity v1.16.2 - Galaxy tools - - v3 + - v0 About ------ @@ -35,17 +35,17 @@ Use the following links for more details of the msPurity R package: Dependencies ------- -Dependencies for these Galaxy tools should be handled by CONDA. The most recent version of the msPurity R package can found on channel `tomnl `_ on `anaconda `_. Warning this will be a different version of the package compared to the BICONDA bioconductor-mspurity. +------------------ +Dependencies for these Galaxy tools should be handled by CONDA. Galaxy ------- +------------------ `Galaxy `_ is an open, web-based platform for data intensive biomedical research. Whether on the free public server or your own instance, you can perform, reproduce, and share complete analyses. Authors, contributors & contacts -------------------------- +------------------------------------- - Thomas N. Lawson (t.n.lawson@bham.ac.uk) - `University of Birmingham (UK) `_ - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) `_ @@ -55,6 +55,12 @@ Authors, contributors & contacts Changes ------------------------- +v1.16.2-galaxy0 + - Version bump + - Fix for intra spectral matching + - Fix for typo https://github.com/computational-metabolomics/mspurity-galaxy/pull/43 + - Lint fixes + v1.12.2-galaxy3 - Bug fix reference to offsets in conditional #41 @@ -73,7 +79,7 @@ v1.12.2-galaxy0 - Update to version v1.12.2 of `msPurity `_ - Optional summary output for combineAnnotations (for very large output) - Extra column added to flagRemove output - - Hide probmetab input + - Hide probmetab input - Make dimsPredictPuritySingle more compatible with "simple workflow inputs" @@ -86,7 +92,7 @@ v1.12.0-galaxy1 - Username updated in Toolshed yaml v1.12.0-galaxy0 - - Updates for Bioconductor stable msPurity v1.12.0 release + - Updates for Bioconductor stable msPurity v1.12.0 release - Additional columns added for spectral matching (for msnpy use case) - Merge of v1.11.4-galaxy1 diff --git a/tools/msPurity/.shed.yml b/tools/msPurity/.shed.yml index db0dece..7885fcd 100644 --- a/tools/msPurity/.shed.yml +++ b/tools/msPurity/.shed.yml @@ -6,6 +6,7 @@ description: "[Metabolomics][LC-MS][DIMS][MS/MS] msPurity - R package for precu auto_tool_repositories: name_template: "{{ tool_id }}" description_template: "[Metabolomics][LC-MS][DIMS][MS/MS] msPurity - {{ tool_name }} {{description}}" + long_description_template: "[Metabolomics][LC-MS][DIMS][MS/MS] msPurity - {{ tool_name }} {{description}}" suite: name: "suite_mspurity" description: "wrapper for msPurity R package" diff --git a/tools/msPurity/averageFragSpectra.R b/tools/msPurity/averageFragSpectra.R index 13a488c..81cc843 100644 --- a/tools/msPurity/averageFragSpectra.R +++ b/tools/msPurity/averageFragSpectra.R @@ -4,33 +4,33 @@ library(xcms) print(sessionInfo()) -get_av_spectra <- function(x){ +get_av_spectra <- function(x) { - if (length(x$av_intra)>0){ + if (length(x$av_intra) > 0) { av_intra_df <- plyr::ldply(x$av_intra) - if (nrow(av_intra_df)==0){ + if (nrow(av_intra_df) == 0) { av_intra_df <- NULL }else{ - av_intra_df$method <- 'intra' + av_intra_df$method <- "intra" } }else{ av_intra_df <- NULL } - if ((is.null(x$av_inter)) || (nrow(x$av_inter)==0)){ + if ((is.null(x$av_inter)) || (nrow(x$av_inter) == 0)) { av_inter_df <- NULL }else{ av_inter_df <- x$av_inter - av_inter_df$method <- 'inter' + av_inter_df$method <- "inter" } - if ((is.null(x$av_all)) || (nrow(x$av_all)==0)){ + if ((is.null(x$av_all)) || (nrow(x$av_all) == 0)) { av_all_df <- NULL }else{ av_all_df <- x$av_all - av_all_df$method <- 'all' + av_all_df$method <- "all" } combined <- plyr::rbind.fill(av_intra_df, av_inter_df, av_all_df) @@ -40,126 +40,116 @@ get_av_spectra <- function(x){ option_list <- list( - make_option("--out_rdata", type="character"), - make_option("--out_peaklist", type="character"), - make_option("--pa", type="character"), - - make_option("--av_level", type="character"), - - make_option("--minfrac", default=0.5), - make_option("--minnum", default=1), - make_option("--ppm", default=5.0), - - make_option("--snr", default=0), - - make_option("--ra", default=0), - - make_option("--av", default="median", type="character"), - make_option("--sumi", action="store_true"), - - make_option("--rmp", action="store_true"), - make_option("--cores", default=1) + make_option("--out_rdata", type = "character"), + make_option("--out_peaklist", type = "character"), + make_option("--pa", type = "character"), + make_option("--av_level", type = "character"), + make_option("--minfrac", default = 0.5), + make_option("--minnum", default = 1), + make_option("--ppm", default = 5.0), + make_option("--snr", default = 0), + make_option("--ra", default = 0), + make_option("--av", default = "median", type = "character"), + make_option("--sumi", action = "store_true"), + make_option("--rmp", action = "store_true"), + make_option("--cores", default = 1) ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -loadRData <- function(rdata_path, name){ -#loads an RData file, and returns the named xset object if it is there +load_r_data <- function(rdata_path, name) { + #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } # Requires -pa <- loadRData(opt$pa, 'pa') +pa <- load_r_data(opt$pa, "pa") pa@cores <- opt$cores -if(is.null(opt$rmp)){ - rmp = FALSE +if (is.null(opt$rmp)) { + rmp <- FALSE }else{ - rmp = TRUE + rmp <- TRUE } -if(is.null(opt$sumi)){ - - sumi = FALSE +if (is.null(opt$sumi)) { + sumi <- FALSE }else{ - sumi = TRUE - + sumi <- TRUE } - -if(opt$av_level=="intra"){ - +if (opt$av_level == "intra") { pa <- msPurity::averageIntraFragSpectra(pa, - minfrac=opt$minfrac, - minnum=opt$minnum, - ppm=opt$ppm, - snr=opt$snr, - ra=opt$ra, - av=opt$av, - sumi=sumi, - rmp=rmp, - cores=opt$cores) - -} else if(opt$av_level=="inter"){ + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores) + +} else if (opt$av_level == "inter") { pa <- msPurity::averageInterFragSpectra(pa, - minfrac=opt$minfrac, - minnum=opt$minnum, - ppm=opt$ppm, - snr=opt$snr, - ra=opt$ra, - av=opt$av, - sumi=sumi, - rmp=rmp, - cores=opt$cores) -} else if(opt$av_level=="all"){ + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores) +} else if (opt$av_level == "all") { pa <- msPurity::averageAllFragSpectra(pa, - minfrac=opt$minfrac, - minnum=opt$minnum, - ppm=opt$ppm, - snr=opt$snr, - ra=opt$ra, - av=opt$av, - sumi=sumi, - rmp=rmp, - cores=opt$cores) - + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores) } print(pa) -save(pa, file=opt$out_rdata) - +save(pa, file = opt$out_rdata) -if (length(pa)>0){ +if (length(pa) > 0) { av_spectra <- plyr::ldply(pa@av_spectra, get_av_spectra) - if (nrow(av_spectra)==0){ - message('No average spectra available') - } else{ - colnames(av_spectra)[1] <- 'grpid' + if (nrow(av_spectra) == 0) { + message("No average spectra available") + } else { + colnames(av_spectra)[1] <- "grpid" av_spectra$grpid <- names(pa@av_spectra)[av_spectra$grpid] - - if((length(pa@av_intra_params)>0) || (length(pa@av_inter_params)>0) ){ - # Add some extra info (only required if av_intra or av_inter performed) - colnames(av_spectra)[2] <- 'fileid' - av_spectra$avid <- 1:nrow(av_spectra) - - filenames <- sapply(av_spectra$fileid, function(x) names(pa@fileList)[as.integer(x)]) - # filenames_galaxy <- sapply(av_spectra$fileid, function(x) basename(pa@fileList[as.integer(x)])) - - av_spectra = as.data.frame(append(av_spectra, list(filename = filenames), after=2)) + + if ((length(pa@av_intra_params) > 0) || (length(pa@av_inter_params) > 0)) { + # Add some extra info (only required if av_intra or av_inter performed) + colnames(av_spectra)[2] <- "fileid" + av_spectra$avid <- seq_len(nrow(av_spectra)) + + filenames <- sapply(av_spectra$fileid, + function(x) names(pa@fileList)[as.integer(x)]) + # filenames_galaxy <- sapply( + # av_spectra$fileid, function(x) basename(pa@fileList[as.integer(x)])) + + av_spectra <- as.data.frame( + append(av_spectra, list(filename = filenames), after = 2)) } print(head(av_spectra)) - write.table(av_spectra, opt$out_peaklist, row.names=FALSE, sep='\t') + write.table(av_spectra, opt$out_peaklist, row.names = FALSE, sep = "\t") } } - diff --git a/tools/msPurity/combineAnnotations.R b/tools/msPurity/combineAnnotations.R index 5a83939..04f7aa3 100644 --- a/tools/msPurity/combineAnnotations.R +++ b/tools/msPurity/combineAnnotations.R @@ -4,115 +4,120 @@ print(sessionInfo()) # Get the parameter option_list <- list( - make_option(c("-s","--sm_resultPth"),type="character"), - make_option(c("-m","--metfrag_resultPth"),type="character"), - make_option(c("-c","--sirius_csi_resultPth"),type="character"), - make_option(c("-p","--probmetab_resultPth"),type="character"), - make_option(c("-l","--ms1_lookup_resultPth"),type="character"), - - make_option("--ms1_lookup_checkAdducts", action="store_true"), - make_option("--ms1_lookup_keepAdducts", type="character", default=NA), - make_option("--ms1_lookup_dbSource", type="character", default="hmdb"), - - make_option("--sm_weight", type="numeric"), - make_option("--metfrag_weight", type="numeric"), - make_option("--sirius_csi_weight", type="numeric"), - make_option("--probmetab_weight", type="numeric"), - make_option("--ms1_lookup_weight", type="numeric"), - make_option("--biosim_weight", type="numeric"), - - make_option("--summaryOutput", action="store_true"), - - make_option("--create_new_database", action="store_true"), - make_option("--outdir", type="character", default="."), - - make_option("--compoundDbType", type="character", default="sqlite"), - make_option("--compoundDbPth", type="character", default=NA), - make_option("--compoundDbHost", type="character", default=NA) + make_option(c("-s", "--sm_resultPth"), type = "character"), + make_option(c("-m", "--metfrag_resultPth"), type = "character"), + make_option(c("-c", "--sirius_csi_resultPth"), type = "character"), + make_option(c("-p", "--probmetab_resultPth"), type = "character"), + make_option(c("-l", "--ms1_lookup_resultPth"), type = "character"), + + make_option("--ms1_lookup_checkAdducts", action = "store_true"), + make_option("--ms1_lookup_keepAdducts", type = "character", default = NA), + make_option("--ms1_lookup_dbSource", type = "character", default = "hmdb"), + + make_option("--sm_weight", type = "numeric"), + make_option("--metfrag_weight", type = "numeric"), + make_option("--sirius_csi_weight", type = "numeric"), + make_option("--probmetab_weight", type = "numeric"), + make_option("--ms1_lookup_weight", type = "numeric"), + make_option("--biosim_weight", type = "numeric"), + + make_option("--summaryOutput", action = "store_true"), + + make_option("--create_new_database", action = "store_true"), + make_option("--outdir", type = "character", default = "."), + + make_option("--compoundDbType", type = "character", default = "sqlite"), + make_option("--compoundDbPth", type = "character", default = NA), + make_option("--compoundDbHost", type = "character", default = NA) ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -if (!is.null(opt$create_new_database)){ - sm_resultPth <- file.path(opt$outdir, 'combined_annotations.sqlite') +if (!is.null(opt$create_new_database)) { + sm_resultPth <- file.path(opt$outdir, "combined_annotations.sqlite") file.copy(opt$sm_resultPth, sm_resultPth) }else{ sm_resultPth <- opt$sm_resultPth } -if (is.null(opt$ms1_lookup_checkAdducts)){ +if (is.null(opt$ms1_lookup_checkAdducts)) { opt$ms1_lookup_checkAdducts <- FALSE } -if (!is.null(opt$ms1_lookup_keepAdducts)){ +if (!is.null(opt$ms1_lookup_keepAdducts)) { opt$ms1_lookup_keepAdducts <- gsub("__ob__", "[", opt$ms1_lookup_keepAdducts) opt$ms1_lookup_keepAdducts <- gsub("__cb__", "]", opt$ms1_lookup_keepAdducts) ms1_lookup_keepAdducts <- strsplit(opt$ms1_lookup_keepAdducts, ",")[[1]] } -weights <-list('sm'=opt$sm_weight, - 'metfrag'=opt$metfrag_weight, - 'sirius_csifingerid'= opt$sirius_csi_weight, - 'probmetab'=opt$probmetab_weight, - 'ms1_lookup'=opt$ms1_lookup_weight, - 'biosim'=opt$biosim_weight +weights <- list("sm" = opt$sm_weight, + "metfrag" = opt$metfrag_weight, + "sirius_csifingerid" = opt$sirius_csi_weight, + "probmetab" = opt$probmetab_weight, + "ms1_lookup" = opt$ms1_lookup_weight, + "biosim" = opt$biosim_weight ) print(weights) -if (is.null(opt$probmetab_resultPth)){ - opt$probmetab_resultPth = NA +if (is.null(opt$probmetab_resultPth)) { + opt$probmetab_resultPth <- NA } -if (round(!sum(unlist(weights),0)==1)){ - stop(paste0('The weights should sum to 1 not ', sum(unlist(weights)))) +if (round(!sum(unlist(weights), 0) == 1)) { + stop(paste0("The weights should sum to 1 not ", sum(unlist(weights)))) } -if (is.null(opt$summaryOutput)){ - summaryOutput = FALSE +if (is.null(opt$summaryOutput)) { + summaryOutput <- FALSE }else{ - summaryOutput = TRUE + summaryOutput <- TRUE } -if (opt$compoundDbType=='local_config'){ +if (opt$compoundDbType == "local_config") { # load in compound config # Soure local function taken from workflow4metabolomics - source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } + source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) + } source_local("dbconfig.R") }else{ - compoundDbPth = opt$compoundDbPth - compoundDbType = opt$compoundDbType - compoundDbName = NA - compoundDbHost = NA - compoundDbPort = NA - compoundDbUser = NA - compoundDbPass = NA + compoundDbPth <- opt$compoundDbPth + compoundDbType <- opt$compoundDbType + compoundDbName <- NA + compoundDbHost <- NA + compoundDbPort <- NA + compoundDbUser <- NA + compoundDbPass <- NA } summary_output <- msPurity::combineAnnotations( - sm_resultPth = sm_resultPth, - compoundDbPth = compoundDbPth, - metfrag_resultPth = opt$metfrag_resultPth, - sirius_csi_resultPth = opt$sirius_csi_resultPth, - probmetab_resultPth = opt$probmetab_resultPth, - ms1_lookup_resultPth = opt$ms1_lookup_resultPth, - ms1_lookup_keepAdducts = ms1_lookup_keepAdducts, - ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts, - - compoundDbType = compoundDbType, - compoundDbName = compoundDbName, - compoundDbHost = compoundDbHost, - compoundDbPort = compoundDbPort, - compoundDbUser = compoundDbUser, - compoundDbPass = compoundDbPass, - weights = weights, - summaryOutput = summaryOutput) -if (summaryOutput){ - write.table(summary_output, file.path(opt$outdir, 'combined_annotations.tsv'), sep = '\t', row.names = FALSE) + sm_resultPth = sm_resultPth, + compoundDbPth = compoundDbPth, + metfrag_resultPth = opt$metfrag_resultPth, + sirius_csi_resultPth = opt$sirius_csi_resultPth, + probmetab_resultPth = opt$probmetab_resultPth, + ms1_lookup_resultPth = opt$ms1_lookup_resultPth, + ms1_lookup_keepAdducts = ms1_lookup_keepAdducts, + ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts, + + compoundDbType = compoundDbType, + compoundDbName = compoundDbName, + compoundDbHost = compoundDbHost, + compoundDbPort = compoundDbPort, + compoundDbUser = compoundDbUser, + compoundDbPass = compoundDbPass, + weights = weights, + summaryOutput = summaryOutput) +if (summaryOutput) { + write.table(summary_output, + file.path(opt$outdir, "combined_annotations.tsv"), + sep = "\t", row.names = FALSE) } - -write.table(summary_output, file.path(opt$outdir, 'combined_annotations.tsv'), sep = '\t', row.names = FALSE) - +write.table(summary_output, + file.path(opt$outdir, "combined_annotations.tsv"), + sep = "\t", row.names = FALSE) closeAllConnections() - diff --git a/tools/msPurity/createDatabase.R b/tools/msPurity/createDatabase.R index 0c2879b..c7ab9c5 100644 --- a/tools/msPurity/createDatabase.R +++ b/tools/msPurity/createDatabase.R @@ -3,19 +3,18 @@ library(optparse) library(xcms) library(CAMERA) print(sessionInfo()) -print('CREATING DATABASE') +print("CREATING DATABASE") -xset_pa_filename_fix <- function(opt, pa, xset){ +xset_pa_filename_fix <- function(opt, pa, xset) { - if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){ + if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables # needs to be done due to Galaxy moving the files around and screwing up any links to files - filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]]) + filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) filepaths <- filepaths[filepaths != ""] - new_names <- basename(filepaths) - galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]]) + galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) galaxy_names <- galaxy_names[galaxy_names != ""] nsave <- names(pa@fileList) @@ -28,10 +27,10 @@ xset_pa_filename_fix <- function(opt, pa, xset){ } - if(!all(basename(pa@fileList)==basename(xset@filepaths))){ - if(!all(names(pa@fileList)==basename(xset@filepaths))){ - print('FILELISTS DO NOT MATCH') - message('FILELISTS DO NOT MATCH') + if (!all(basename(pa@fileList) == basename(xset@filepaths))) { + if (!all(names(pa@fileList) == basename(xset@filepaths))) { + print("FILELISTS DO NOT MATCH") + message("FILELISTS DO NOT MATCH") quit(status = 1) }else{ xset@filepaths <- unname(pa@fileList) @@ -48,23 +47,23 @@ xset_pa_filename_fix <- function(opt, pa, xset){ option_list <- list( - make_option(c("-o", "--outDir"), type="character"), - make_option("--pa", type="character"), - make_option("--xset_xa", type="character"), - make_option("--xcms_camera_option", type="character"), - make_option("--eic", action="store_true"), - make_option("--cores", default=4), - make_option("--mzML_files", type="character"), - make_option("--galaxy_names", type="character"), - make_option("--grpPeaklist", type="character") + make_option(c("-o", "--outDir"), type = "character"), + make_option("--pa", type = "character"), + make_option("--xset_xa", type = "character"), + make_option("--xcms_camera_option", type = "character"), + make_option("--eic", action = "store_true"), + make_option("--cores", default = 4), + make_option("--mzML_files", type = "character"), + make_option("--galaxy_names", type = "character"), + make_option("--grpPeaklist", type = "character") ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -loadRData <- function(rdata_path, name){ +loadRData <- function(rdata_path, name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) @@ -73,36 +72,36 @@ loadRData <- function(rdata_path, name){ getxcmsSetObject <- function(xobject) { # XCMS 1.x if (class(xobject) == "xcmsSet") - return (xobject) + return(xobject) # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, 'xcmsSet')) - sampclass(xset) <- xset@phenoData$sample_group - return (xset) + suppressWarnings(xset <- as(xobject, "xcmsSet")) + xcms::sampclass(xset) <- xset@phenoData$sample_group + return(xset) } } -print(paste('pa', opt$pa)) +print(paste("pa", opt$pa)) print(opt$xset) print(opt$xcms_camera_option) # Requires -pa <- loadRData(opt$pa, 'pa') +pa <- loadRData(opt$pa, "pa") print(pa@fileList) # Missing list element causes failures (should be updated # in msPurity R package for future releases) -if (!exists('allfrag', where=pa@filter_frag_params)){ +if (!exists("allfrag", where = pa@filter_frag_params)) { pa@filter_frag_params$allfrag <- FALSE } -if (opt$xcms_camera_option=='xcms'){ +if (opt$xcms_camera_option == "xcms") { - xset <- loadRData(opt$xset, c('xset','xdata')) + xset <- loadRData(opt$xset, c("xset", "xdata")) xset <- getxcmsSetObject(xset) fix <- xset_pa_filename_fix(opt, pa, xset) pa <- fix[[1]] @@ -110,7 +109,7 @@ if (opt$xcms_camera_option=='xcms'){ xa <- NULL }else{ - xa <- loadRData(opt$xset, 'xa') + xa <- loadRData(opt$xset, "xa") fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet) pa <- fix[[1]] xa@xcmsSet <- fix[[2]] @@ -118,36 +117,33 @@ if (opt$xcms_camera_option=='xcms'){ } - -if(is.null(opt$grpPeaklist)){ - grpPeaklist = NA +if (is.null(opt$grpPeaklist)) { + grpPeaklist <- NA }else{ - grpPeaklist = opt$grpPeaklist + grpPeaklist <- opt$grpPeaklist } - - dbPth <- msPurity::createDatabase(pa, - xset=xset, - xsa=xa, - outDir=opt$outDir, - grpPeaklist=grpPeaklist, - dbName='createDatabase_output.sqlite' + xset = xset, + xsa = xa, + outDir = opt$outDir, + grpPeaklist = grpPeaklist, + dbName = "createDatabase_output.sqlite" ) -if (!is.null(opt$eic)){ +if (!is.null(opt$eic)) { - if (is.null(xset)){ + if (is.null(xset)) { xset <- xa@xcmsSet } # previous check should have matched filelists together xset@filepaths <- unname(pa@fileList) - convert2Raw <- function(x, xset){ + convert2Raw <- function(x, xset) { sid <- unique(x$sample) # for each file get list of peaks x$rt_raw <- xset@rt$raw[[sid]][match(x$rt, xset@rt$corrected[[sid]])] @@ -157,13 +153,14 @@ if (!is.null(opt$eic)){ } - xset@peaks <- as.matrix(plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset=xset)) + xset@peaks <- as.matrix( + plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset = xset)) # Saves the EICS into the previously created database px <- msPurity::purityX(xset, saveEIC = TRUE, - cores=1, - sqlitePth=dbPth, + cores = 1, + sqlitePth = dbPth, rtrawColumns = TRUE) } diff --git a/tools/msPurity/createDatabase.xml b/tools/msPurity/createDatabase.xml index 6974a40..851b0e0 100644 --- a/tools/msPurity/createDatabase.xml +++ b/tools/msPurity/createDatabase.xml @@ -101,7 +101,7 @@ Create SQLite database for LC-MS(/MS) dataset Create and SQLite database of an LC-MS(/MS) experiment (replaces the create_database function). -Schema details can be found [here](https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html). +Schema details can be found [here](https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html). **Example LC-MS/MS processing workflow** diff --git a/tools/msPurity/createMSP.R b/tools/msPurity/createMSP.R index 985cff2..180fff6 100644 --- a/tools/msPurity/createMSP.R +++ b/tools/msPurity/createMSP.R @@ -4,39 +4,40 @@ print(sessionInfo()) # Get the parameter option_list <- list( - make_option("--rdata_input",type="character"), - make_option("--method",type="character"), - make_option("--metadata",type="character"), - make_option("--metadata_cols",type="character"), - make_option("--metadata_cols_filter",type="character"), - make_option("--adduct_split", action="store_true"), - make_option("--xcms_groupids",type="character"), - make_option("--filter",action="store_true"), - make_option("--intensity_ra",type="character"), - make_option("--include_adducts",type="character"), - make_option("--msp_schema",type="character"), - make_option("--include_adducts_custom",type="character", default=""), - make_option("--out_dir",type="character", default=".") + make_option("--rdata_input", type = "character"), + make_option("--method", type = "character"), + make_option("--metadata", type = "character"), + make_option("--metadata_cols", type = "character"), + make_option("--metadata_cols_filter", type = "character"), + make_option("--adduct_split", action = "store_true"), + make_option("--xcms_groupids", type = "character"), + make_option("--filter", action = "store_true"), + make_option("--intensity_ra", type = "character"), + make_option("--include_adducts", type = "character"), + make_option("--msp_schema", type = "character"), + make_option("--include_adducts_custom", type = "character", default = ""), + make_option("--out_dir", type = "character", default = ".") ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) load(opt$rdata_input) -if (is.null(opt$metadata)){ +if (is.null(opt$metadata)) { metadata <- NULL }else{ - metadata <- read.table(opt$metadata, header = TRUE, sep='\t', stringsAsFactors = FALSE, check.names = FALSE) + metadata <- read.table(opt$metadata, header = TRUE, sep = "\t", + stringsAsFactors = FALSE, check.names = FALSE) - if(!opt$metadata_cols_filter==''){ - metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ',')[[1]] + if (!opt$metadata_cols_filter == "") { + metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ",")[[1]] - metadata <- metadata[,metadata_cols_filter, drop=FALSE] + metadata <- metadata[, metadata_cols_filter, drop = FALSE] print(metadata) - if (!"grpid" %in% colnames(metadata)){ - metadata$grpid <- 1:nrow(metadata) + if (!"grpid" %in% colnames(metadata)) { + metadata$grpid <- seq_len(nrow(metadata)) } print(metadata) @@ -47,7 +48,7 @@ if (is.null(opt$metadata)){ -if (is.null(opt$metadata_cols) || opt$metadata_cols==''){ +if (is.null(opt$metadata_cols) || opt$metadata_cols == "") { metadata_cols <- NULL }else{ metadata_cols <- opt$metadata_cols @@ -55,36 +56,33 @@ if (is.null(opt$metadata_cols) || opt$metadata_cols==''){ } -if(is.null(opt$adduct_split)){ +if (is.null(opt$adduct_split)) { adduct_split <- FALSE }else{ adduct_split <- TRUE } -if (is.null(opt$xcms_groupids)){ +if (is.null(opt$xcms_groupids)) { xcms_groupids <- NULL }else{ - xcms_groupids <- trimws(strsplit(opt$xcms_groupids, ',')[[1]]) + xcms_groupids <- trimws(strsplit(opt$xcms_groupids, ",")[[1]]) } - - - -if (is.null(opt$include_adducts_custom)){ - include_adducts_custom <- '' +if (is.null(opt$include_adducts_custom)) { + include_adducts_custom <- "" }else{ include_adducts_custom <- opt$include_adducts_custom } -if (opt$include_adducts=='None'){ - include_adducts <- '' +if (opt$include_adducts == "None") { + include_adducts <- "" }else{ include_adducts <- opt$include_adducts } -include_adducts_all <- paste(include_adducts_custom, ',', include_adducts, sep="") +include_adducts_all <- paste(include_adducts_custom, ",", include_adducts, sep = "") include_adducts_all <- gsub("^,", "", include_adducts_all) include_adducts_all <- gsub(",$", "", include_adducts_all) @@ -96,7 +94,7 @@ include_adducts_all <- gsub(",", " ", include_adducts_all) -if(is.null(opt$filter)){ +if (is.null(opt$filter)) { filter <- FALSE }else{ filter <- TRUE @@ -105,15 +103,15 @@ if(is.null(opt$filter)){ msPurity::createMSP(pa, - msp_file_pth = file.path(opt$out_dir, 'lcmsms_spectra.msp'), + msp_file_pth = file.path(opt$out_dir, "lcmsms_spectra.msp"), metadata = metadata, metadata_cols = metadata_cols, method = opt$method, adduct_split = adduct_split, xcms_groupids = xcms_groupids, filter = filter, - intensity_ra=opt$intensity_ra, - include_adducts=include_adducts_all, - msp_schema=opt$msp_schema) + intensity_ra = opt$intensity_ra, + include_adducts = include_adducts_all, + msp_schema = opt$msp_schema) -print('msp created') +print("msp created") diff --git a/tools/msPurity/dbconfig.R b/tools/msPurity/dbconfig.R index dd000ad..f411256 100644 --- a/tools/msPurity/dbconfig.R +++ b/tools/msPurity/dbconfig.R @@ -1,9 +1,9 @@ -compoundDbType <- 'mysql' +compoundDbType <- "mysql" compoundDbPth <- NA -compoundDbName <- 'metab_compound' -compoundDbPort <- '3306' -compoundDbUser <- 'metab_compound' -compoundDbPass <- 'metab_compound' +compoundDbName <- "metab_compound" +compoundDbPort <- "3306" +compoundDbUser <- "metab_compound" +compoundDbPass <- "metab_compound" q_dbPth <- NA q_dbType <- NA diff --git a/tools/msPurity/dimsPredictPuritySingle.R b/tools/msPurity/dimsPredictPuritySingle.R index ad3f894..8079c5c 100644 --- a/tools/msPurity/dimsPredictPuritySingle.R +++ b/tools/msPurity/dimsPredictPuritySingle.R @@ -3,27 +3,27 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("--mzML_file"), type="character"), - make_option(c("--mzML_files"), type="character"), - make_option(c("--mzML_filename"), type="character", default=''), - make_option(c("--mzML_galaxy_names"), type="character", default=''), - make_option(c("--peaks_file"), type="character"), - make_option(c("-o", "--out_dir"), type="character"), - make_option("--minoffset", default=0.5), - make_option("--maxoffset", default=0.5), - make_option("--ilim", default=0.05), - make_option("--ppm", default=4), - make_option("--dimspy", action="store_true"), - make_option("--sim", action="store_true"), - make_option("--remove_nas", action="store_true"), - make_option("--iwNorm", default="none", type="character"), - make_option("--file_num_dimspy", default=1), - make_option("--exclude_isotopes", action="store_true"), - make_option("--isotope_matrix", type="character") + make_option(c("--mzML_file"), type = "character"), + make_option(c("--mzML_files"), type = "character"), + make_option(c("--mzML_filename"), type = "character", default = ""), + make_option(c("--mzML_galaxy_names"), type = "character", default = ""), + make_option(c("--peaks_file"), type = "character"), + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--minoffset", default = 0.5), + make_option("--maxoffset", default = 0.5), + make_option("--ilim", default = 0.05), + make_option("--ppm", default = 4), + make_option("--dimspy", action = "store_true"), + make_option("--sim", action = "store_true"), + make_option("--remove_nas", action = "store_true"), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--file_num_dimspy", default = 1), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character") ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(sessionInfo()) print(opt) @@ -31,119 +31,118 @@ print(opt) print(opt$mzML_files) print(opt$mzML_galaxy_names) -str_to_vec <- function(x){ +str_to_vec <- function(x) { print(x) - x <- trimws(strsplit(x, ',')[[1]]) + x <- trimws(strsplit(x, ",")[[1]]) return(x[x != ""]) } -find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){ +find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { mzML_filename <- trimws(mzML_filename) mzML_files <- str_to_vec(mzML_files) galaxy_names <- str_to_vec(galaxy_names) - if (mzML_filename %in% galaxy_names){ - return(mzML_files[galaxy_names==mzML_filename]) + if (mzML_filename %in% galaxy_names) { + return(mzML_files[galaxy_names == mzML_filename]) }else{ stop(paste("mzML file not found - ", mzML_filename)) } } -if (is.null(opt$dimspy)){ - df <- read.table(opt$peaks_file, header = TRUE, sep='\t') - if (file.exists(opt$mzML_file)){ +if (is.null(opt$dimspy)) { + df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") + if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)){ - mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, + }else if (!is.null(opt$mzML_files)) { + mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, opt$mzML_filename) }else{ - mzML_file <- file.path(opt$mzML_file, filename) - } + mzML_file <- file.path(opt$mzML_file, filename) + } }else{ indf <- read.table(opt$peaks_file, - header = TRUE, sep='\t', stringsAsFactors = FALSE) - + header = TRUE, sep = "\t", stringsAsFactors = FALSE) + filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] print(filename) # check if the data file is mzML or RAW (can only use mzML currently) so # we expect an mzML file of the same name in the same folder - indf$i <- indf[,colnames(indf)==filename] - indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) - - filename = sub("raw", "mzML", filename, ignore.case = TRUE) + indf$i <- indf[, colnames(indf) == filename] + indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename]) + + filename <- sub("raw", "mzML", filename, ignore.case = TRUE) print(filename) - - - if (file.exists(opt$mzML_file)){ + + if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)){ + }else if (!is.null(opt$mzML_files)) { mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) }else{ - mzML_file <- file.path(opt$mzML_file, filename) - } - - # Update the dimspy output with the correct information - df <- indf[4:nrow(indf),] - if ('blank_flag' %in% colnames(df)){ - df <- df[df$blank_flag==1,] + mzML_file <- file.path(opt$mzML_file, filename) } - colnames(df)[colnames(df)=='m.z'] <- 'mz' - - if ('nan' %in% df$mz){ - df[df$mz=='nan',]$mz <- NA + + # Update the dimspy output with the correct information + df <- indf[4:nrow(indf), ] + if ("blank_flag" %in% colnames(df)) { + df <- df[df$blank_flag == 1, ] + } + colnames(df)[colnames(df) == "m.z"] <- "mz" + + if ("nan" %in% df$mz) { + df[df$mz == "nan", ]$mz <- NA } df$mz <- as.numeric(df$mz) } -if (!is.null(opt$remove_nas)){ - df <- df[!is.na(df$mz),] +if (!is.null(opt$remove_nas)) { + df <- df[!is.na(df$mz), ] } -if (is.null(opt$isotope_matrix)){ +if (is.null(opt$isotope_matrix)) { im <- NULL }else{ im <- read.table(opt$isotope_matrix, - header = TRUE, sep='\t', stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE) } -if (is.null(opt$exclude_isotopes)){ +if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE }else{ isotopes <- TRUE } -if (is.null(opt$sim)){ - sim=FALSE +if (is.null(opt$sim)) { + sim <- FALSE }else{ - sim=TRUE + sim <- TRUE } -minOffset = as.numeric(opt$minoffset) -maxOffset = as.numeric(opt$maxoffset) - -if (opt$iwNorm=='none'){ - iwNorm = FALSE - iwNormFun = NULL -}else if (opt$iwNorm=='gauss'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='rcosine'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='QE5'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormQE.5() +minOffset <- as.numeric(opt$minoffset) +maxOffset <- as.numeric(opt$maxoffset) + +if (opt$iwNorm == "none") { + iwNorm <- FALSE + iwNormFun <- NULL +}else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } -print('FIRST ROWS OF PEAK FILE') +print("FIRST ROWS OF PEAK FILE") print(head(df)) print(mzML_file) predicted <- msPurity::dimsPredictPuritySingle(df$mz, - filepth=mzML_file, - minOffset=minOffset, - maxOffset=maxOffset, - ppm=opt$ppm, - mzML=TRUE, + filepth = mzML_file, + minOffset = minOffset, + maxOffset = maxOffset, + ppm = opt$ppm, + mzML = TRUE, sim = sim, ilim = opt$ilim, isotopes = isotopes, @@ -154,10 +153,8 @@ predicted <- msPurity::dimsPredictPuritySingle(df$mz, predicted <- cbind(df, predicted) print(head(predicted)) -print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) - -write.table(predicted, - file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), - row.names=FALSE, sep='\t') - +print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) +write.table(predicted, + file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), + row.names = FALSE, sep = "\t") diff --git a/tools/msPurity/filterFragSpectra.R b/tools/msPurity/filterFragSpectra.R index 010784f..9020bb4 100644 --- a/tools/msPurity/filterFragSpectra.R +++ b/tools/msPurity/filterFragSpectra.R @@ -5,96 +5,95 @@ print(sessionInfo()) option_list <- list( - make_option("--out_rdata", type="character"), - make_option("--out_peaklist_prec", type="character"), - make_option("--out_peaklist_frag", type="character"), - make_option("--pa", type="character"), + make_option("--out_rdata", type = "character"), + make_option("--out_peaklist_prec", type = "character"), + make_option("--out_peaklist_frag", type = "character"), + make_option("--pa", type = "character"), - make_option("--ilim", default=0.0), - make_option("--plim", default=0.0), + make_option("--ilim", default = 0.0), + make_option("--plim", default = 0.0), - make_option("--ra", default=0.0), - make_option("--snr", default=0.0), + make_option("--ra", default = 0.0), + make_option("--snr", default = 0.0), - make_option("--rmp", action="store_true"), - make_option("--snmeth", default="median", type="character") + make_option("--rmp", action = "store_true"), + make_option("--snmeth", default = "median", type = "character") ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -loadRData <- function(rdata_path, name){ +loadRData <- function(rdata_path, name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } # Requires -pa <- loadRData(opt$pa, 'pa') +pa <- loadRData(opt$pa, "pa") -if(is.null(opt$rmp)){ - opt$rmp = FALSE +if (is.null(opt$rmp)) { + opt$rmp <- FALSE }else{ - opt$rmp = TRUE + opt$rmp <- TRUE } -pa <- filterFragSpectra(pa, - ilim=opt$ilim, - plim=opt$plim, - ra=opt$ra, - snr=opt$snr, - rmp=opt$rmp, - snmeth=opt$snmeth) +pa <- filterFragSpectra(pa, + ilim = opt$ilim, + plim = opt$plim, + ra = opt$ra, + snr = opt$snr, + rmp = opt$rmp, + snmeth = opt$snmeth) print(pa) -save(pa, file=opt$out_rdata) +save(pa, file = opt$out_rdata) # get the msms data for grpid from the purityA object -msmsgrp <- function(grpid, pa){ +msmsgrp <- function(grpid, pa) { msms <- pa@grped_ms2[grpid] - - grpinfo <- pa@grped_df[pa@grped_df$grpid==grpid,] - - grpinfo$subsetid <- 1:nrow(grpinfo) - result <- plyr::ddply(grpinfo, ~subsetid, setid, msms=msms) + + grpinfo <- pa@grped_df[pa@grped_df$grpid == grpid, ] + + grpinfo$subsetid <- seq_len(nrow(grpinfo)) + result <- plyr::ddply(grpinfo, ~subsetid, setid, msms = msms) return(result) } -# Set the relevant details -setid <- function(grpinfo_i, msms){ +# Set the relevant details +setid <- function(grpinfo_i, msms) { msms_i <- msms[[1]][[grpinfo_i$subsetid]] n <- nrow(msms_i) msms_i <- data.frame(msms_i) - colnames(msms_i)[1:2] <- c('mz', 'i') - m <- cbind('grpid'=rep(grpinfo_i$grpid,n), 'pid'=rep(grpinfo_i$pid,n), 'fileid'=rep(grpinfo_i$fileid,n), msms_i) + colnames(msms_i)[1:2] <- c("mz", "i") + m <- cbind("grpid" = rep(grpinfo_i$grpid, n), "pid" = rep(grpinfo_i$pid, n), "fileid" = rep(grpinfo_i$fileid, n), msms_i) return(m) } -if (length(pa)>0){ +if (length(pa) > 0) { - if (length(pa@grped_ms2)==0){ - message('No spectra available') - } else{ + if (length(pa@grped_ms2) == 0) { + message("No spectra available") + } else { # get group ids grpids <- unique(as.character(pa@grped_df$grpid)) # loop through all the group ids - df_fragments = plyr::adply(grpids, 1, msmsgrp, pa=pa) - df_fragments = merge(df_fragments, pa@puritydf[,c("pid", "acquisitionNum", "precursorScanNum")], by="pid") - df_fragments = df_fragments[order(df_fragments$grpid, df_fragments$pid, df_fragments$mz),] + df_fragments <- plyr::adply(grpids, 1, msmsgrp, pa = pa) + df_fragments <- merge(df_fragments, pa@puritydf[, c("pid", "acquisitionNum", "precursorScanNum")], by = "pid") + df_fragments <- df_fragments[order(df_fragments$grpid, df_fragments$pid, df_fragments$mz), ] #select and reorder columns - df_fragments = df_fragments[,c("grpid", "pid", "precursorScanNum", "acquisitionNum", "fileid", "mz", "i", "snr", "ra", "purity_pass_flag", "intensity_pass_flag", "ra_pass_flag", "snr_pass_flag", "pass_flag")] + df_fragments <- df_fragments[, c("grpid", "pid", "precursorScanNum", "acquisitionNum", "fileid", "mz", "i", "snr", "ra", "purity_pass_flag", "intensity_pass_flag", "ra_pass_flag", "snr_pass_flag", "pass_flag")] - pa@grped_df$filename = sapply(pa@grped_df$fileid, function(x) names(pa@fileList)[as.integer(x)]) + pa@grped_df$filename <- sapply(pa@grped_df$fileid, function(x) names(pa@fileList)[as.integer(x)]) - print(head(pa@grped_df)) - write.table(pa@grped_df, opt$out_peaklist_prec, row.names=FALSE, sep='\t') + print(head(pa@grped_df)) + write.table(pa@grped_df, opt$out_peaklist_prec, row.names = FALSE, sep = "\t") print(head(df_fragments)) - write.table(df_fragments, opt$out_peaklist_frag, row.names=FALSE, sep='\t') + write.table(df_fragments, opt$out_peaklist_frag, row.names = FALSE, sep = "\t") } } - diff --git a/tools/msPurity/flagRemove.R b/tools/msPurity/flagRemove.R index 13eec03..8ba8bde 100644 --- a/tools/msPurity/flagRemove.R +++ b/tools/msPurity/flagRemove.R @@ -2,105 +2,100 @@ library(msPurity) library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("-o", "--out_dir"), type="character", default=getwd(), - help="Output folder for resulting files [default = %default]" + make_option(c("-o", "--out_dir"), type = "character", default = getwd(), + help = "Output folder for resulting files [default = %default]" ), - make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"), - help="The path to the xcmsSet object [default = %default]" + make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), + help = "The path to the xcmsSet object [default = %default]" ), - make_option("--polarity", default=NA, - help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" + make_option("--polarity", default = NA, + help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" ), - make_option("--rsd_i_blank", default=100, - help="RSD threshold for the blank [default = %default]" + make_option("--rsd_i_blank", default = 100, + help = "RSD threshold for the blank [default = %default]" ), - make_option("--minfrac_blank", default=0.5, - help="minimum fraction of files for features needed for the blank [default = %default]" + make_option("--minfrac_blank", default = 0.5, + help = "minimum fraction of files for features needed for the blank [default = %default]" ), - make_option("--rsd_rt_blank", default=100, - help="RSD threshold for the RT of the blank [default = %default]" + make_option("--rsd_rt_blank", default = 100, + help = "RSD threshold for the RT of the blank [default = %default]" ), - make_option("--ithres_blank", default=0, - help="Intensity threshold for the blank [default = %default]" + make_option("--ithres_blank", default = 0, + help = "Intensity threshold for the blank [default = %default]" ), - make_option("--s2b", default=10, - help="fold change (sample/blank) needed for sample peak to be allowed. e.g. + make_option("--s2b", default = 10, + help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. 1000/10 = 100, so sample has fold change higher than the threshold and the peak is not considered a blank [default = %default]" ), - make_option("--blank_class", default='blank', type="character", - help="A string representing the class that will be used for the blank.[default = %default]" + make_option("--blank_class", default = "blank", type = "character", + help = "A string representing the class that will be used for the blank.[default = %default]" ), - make_option("--egauss_thr", default=NA, - help="Threshold for filtering out non gaussian shaped peaks. Note this only works + make_option("--egauss_thr", default = NA, + help = "Threshold for filtering out non gaussian shaped peaks. Note this only works if the 'verbose columns' and 'fit gauss' was used with xcms [default = %default]" ), - make_option("--rsd_i_sample", default=100, - help="RSD threshold for the samples [default = %default]" + make_option("--rsd_i_sample", default = 100, + help = "RSD threshold for the samples [default = %default]" ), - make_option("--minfrac_sample", default=0.8, - help="minimum fraction of files for features needed for the samples [default = %default]" + make_option("--minfrac_sample", default = 0.8, + help = "minimum fraction of files for features needed for the samples [default = %default]" ), - make_option("--rsd_rt_sample", default=100, - help="RSD threshold for the RT of the samples [default = %default]" + make_option("--rsd_rt_sample", default = 100, + help = "RSD threshold for the RT of the samples [default = %default]" ), - make_option("--ithres_sample", default=5000, - help="Intensity threshold for the sample [default = %default]" + make_option("--ithres_sample", default = 5000, + help = "Intensity threshold for the sample [default = %default]" ), - make_option("--grp_rm_ids", default=NA, - help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) + make_option("--grp_rm_ids", default = NA, + help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) [default = %default]" ), - make_option("--remove_spectra", action="store_true", - help=" TRUE if flagged spectra is to be removed [default = %default]" + make_option("--remove_spectra", action = "store_true", + help = "TRUE if flagged spectra is to be removed [default = %default]" ), - make_option("--minfrac_xcms", default=0.5, - help="minfrac for xcms grouping [default = %default]" + make_option("--minfrac_xcms", default = 0.5, + help = "minfrac for xcms grouping [default = %default]" ), - make_option("--mzwid", default=0.001, - help="mzwid for xcms grouping [default = %default]" + make_option("--mzwid", default = 0.001, + help = "mzwid for xcms grouping [default = %default]" ), - make_option("--bw", default=5, - help="bw for xcms grouping [default = %default]" + make_option("--bw", default = 5, + help = "bw for xcms grouping [default = %default]" ), - make_option("--temp_save", action="store_true", - help="Assign True if files for each step saved (for testing purposes) [default = %default]" + make_option("--temp_save", action = "store_true", + help = "Assign True if files for each step saved (for testing purposes) [default = %default]" ), - make_option("--samplelist", type="character", help="Sample list to determine the blank class") - - - - + make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") ) - #make_option("--multilist", action="store_true" - # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" - #), +# nolint start +# make_option("--multilist", action="store_true" +# help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" +# ), +# nolint end # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) opt <- replace(opt, opt == "NA", NA) - - - -if (is.null(opt$temp_save)){ - temp_save<-FALSE +if (is.null(opt$temp_save)) { + temp_save <- FALSE }else{ - temp_save<-TRUE + temp_save <- TRUE } -if (is.null(opt$remove_spectra)){ - remove_spectra<-FALSE +if (is.null(opt$remove_spectra)) { + remove_spectra <- FALSE }else{ - remove_spectra<-TRUE + remove_spectra <- TRUE } @@ -109,35 +104,35 @@ print(opt) getxcmsSetObject <- function(xobject) { # XCMS 1.x if (class(xobject) == "xcmsSet") - return (xobject) + return(xobject) # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, 'xcmsSet')) - sampclass(xset) <- xset@phenoData$sample_group - return (xset) + suppressWarnings(xset <- as(xobject, "xcmsSet")) + xcms::sampclass(xset) <- xset@phenoData$sample_group + return(xset) } } -loadRData <- function(rdata_path, name){ +loadRData <- function(rdata_path, name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } -xset <- getxcmsSetObject(loadRData(opt$xset_path, c('xset','xdata'))) +xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) print(xset) -if (is.null(opt$samplelist)){ +if (is.null(opt$samplelist)) { blank_class <- opt$blank_class }else{ - samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE) - samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes']) + samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) + samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] - if (length(chosen_blank)>1){ - print('ERROR: only 1 blank is currently allowed to be used with this tool') + if (length(chosen_blank) > 1) { + print("ERROR: only 1 blank is currently allowed to be used with this tool") quit() } blank_class <- as.character(chosen_blank) @@ -145,47 +140,47 @@ if (is.null(opt$samplelist)){ } -if (is.null(opt$multilist)){ +if (is.null(opt$multilist)) { ffrm_out <- flag_remove(xset, - pol=opt$polarity, - rsd_i_blank=opt$rsd_i_blank, - minfrac_blank=opt$minfrac_blank, - rsd_rt_blank=opt$rsd_rt_blank, - ithres_blank=opt$ithres_blank, - s2b=opt$s2b, - ref.class=blank_class, - egauss_thr=opt$egauss_thr, - rsd_i_sample=opt$rsd_i_sample, - minfrac_sample=opt$minfrac_sample, - rsd_rt_sample=opt$rsd_rt_sample, - ithres_sample=opt$ithres_sample, - minfrac_xcms=opt$minfrac_xcms, - mzwid=opt$mzwid, - bw=opt$bw, - out_dir=opt$out_dir, - temp_save=temp_save, - remove_spectra=remove_spectra, - grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]]) - print('flag remove finished') + pol = opt$polarity, + rsd_i_blank = opt$rsd_i_blank, + minfrac_blank = opt$minfrac_blank, + rsd_rt_blank = opt$rsd_rt_blank, + ithres_blank = opt$ithres_blank, + s2b = opt$s2b, + ref.class = blank_class, + egauss_thr = opt$egauss_thr, + rsd_i_sample = opt$rsd_i_sample, + minfrac_sample = opt$minfrac_sample, + rsd_rt_sample = opt$rsd_rt_sample, + ithres_sample = opt$ithres_sample, + minfrac_xcms = opt$minfrac_xcms, + mzwid = opt$mzwid, + bw = opt$bw, + out_dir = opt$out_dir, + temp_save = temp_save, + remove_spectra = remove_spectra, + grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) + print("flag remove finished") xset <- ffrm_out[[1]] grp_peaklist <- ffrm_out[[2]] removed_peaks <- ffrm_out[[3]] - save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'), version=2) + save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2) # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) - peak_pth <- file.path(opt$out_dir, 'peaklist_filtered.tsv') + peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") print(peak_pth) - write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist), - peak_pth, row.names=FALSE, sep='\t') + write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), + peak_pth, row.names = FALSE, sep = "\t") removed_peaks <- data.frame(removed_peaks) - write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), - file.path(opt$out_dir, 'removed_peaks.tsv'), row.names=FALSE, sep='\t') + write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), + file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") }else{ - + # nolint start # TODO #xsets <- split(xset, multilist_df$multlist) # @@ -193,12 +188,10 @@ if (is.null(opt$multilist)){ # #for (mgrp in mult_grps){ # xset_i <- xsets[mgrp] - # xcms::group(xset_i, + # xcms::group(xset_i, # # } - + # nolint end } - - diff --git a/tools/msPurity/frag4feature.R b/tools/msPurity/frag4feature.R index 88f9d8f..92eb54a 100644 --- a/tools/msPurity/frag4feature.R +++ b/tools/msPurity/frag4feature.R @@ -3,18 +3,18 @@ library(msPurity) library(xcms) print(sessionInfo()) -xset_pa_filename_fix <- function(opt, pa, xset=NULL){ +xset_pa_filename_fix <- function(opt, pa, xset=NULL) { - if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)){ + if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables # needs to be done due to Galaxy moving the files around and screwing up any links to files - filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]]) + filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) # nolint + filepaths <- filepaths[filepaths != ""] - new_names <- basename(filepaths) - galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]]) + galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) galaxy_names <- galaxy_names[galaxy_names != ""] nsave <- names(pa@fileList) @@ -28,14 +28,14 @@ xset_pa_filename_fix <- function(opt, pa, xset=NULL){ } print(pa@fileList) - if(!is.null(xset)){ + if (!is.null(xset)) { print(xset@filepaths) - if(!all(basename(pa@fileList)==basename(xset@filepaths))){ - if(!all(names(pa@fileList)==basename(xset@filepaths))){ - print('FILELISTS DO NOT MATCH') - message('FILELISTS DO NOT MATCH') + if (!all(basename(pa@fileList) == basename(xset@filepaths))) { + if (!all(names(pa@fileList) == basename(xset@filepaths))) { + print("FILELISTS DO NOT MATCH") + message("FILELISTS DO NOT MATCH") quit(status = 1) }else{ xset@filepaths <- unname(pa@fileList) @@ -48,26 +48,26 @@ xset_pa_filename_fix <- function(opt, pa, xset=NULL){ option_list <- list( - make_option(c("-o", "--out_dir"), type="character"), - make_option("--pa", type="character"), - make_option("--xset", type="character"), - make_option("--ppm", default=10), - make_option("--plim", default=0.0), - make_option("--convert2RawRT", action="store_true"), - make_option("--intense", action="store_true"), - make_option("--createDB", action="store_true"), - make_option("--cores", default=4), - make_option("--mzML_files", type="character"), - make_option("--galaxy_names", type="character"), - make_option("--grp_peaklist", type="character"), - make_option("--useGroup", action="store_true") + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--pa", type = "character"), + make_option("--xset", type = "character"), + make_option("--ppm", default = 10), + make_option("--plim", default = 0.0), + make_option("--convert2RawRT", action = "store_true"), + make_option("--intense", action = "store_true"), + make_option("--createDB", action = "store_true"), + make_option("--cores", default = 4), + make_option("--mzML_files", type = "character"), + make_option("--galaxy_names", type = "character"), + make_option("--grp_peaklist", type = "character"), + make_option("--useGroup", action = "store_true") ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -loadRData <- function(rdata_path, name){ +loadRData <- function(rdata_path, name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) @@ -78,19 +78,19 @@ loadRData <- function(rdata_path, name){ getxcmsSetObject <- function(xobject) { # XCMS 1.x if (class(xobject) == "xcmsSet") - return (xobject) + return(xobject) # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, 'xcmsSet')) + suppressWarnings(xset <- as(xobject, "xcmsSet")) sampclass(xset) <- xset@phenoData$sample_group - return (xset) + return(xset) } } # Requires -pa <- loadRData(opt$pa, 'pa') -xset <- loadRData(opt$xset, c('xset','xdata')) +pa <- loadRData(opt$pa, "pa") +xset <- loadRData(opt$xset, c("xset", "xdata")) xset <- getxcmsSetObject(xset) pa@cores <- opt$cores @@ -98,63 +98,60 @@ pa@cores <- opt$cores print(pa@fileList) print(xset@filepaths) -if(is.null(opt$intense)){ - intense = FALSE +if (is.null(opt$intense)) { + intense <- FALSE }else{ - intense = TRUE + intense <- TRUE } -if(is.null(opt$convert2RawRT)){ - convert2RawRT = FALSE +if (is.null(opt$convert2RawRT)) { + convert2RawRT <- FALSE }else{ - convert2RawRT= TRUE + convert2RawRT <- TRUE } -if(is.null(opt$createDB)){ - createDB = FALSE +if (is.null(opt$createDB)) { + createDB <- FALSE }else{ - createDB = TRUE + createDB <- TRUE } -if(is.null(opt$useGroup)){ +if (is.null(opt$useGroup)) { fix <- xset_pa_filename_fix(opt, pa, xset) pa <- fix[[1]] xset <- fix[[2]] - useGroup=FALSE + useGroup <- FALSE }else{ # if are only aligning to the group not eah file we do not need to align the files between the xset and pa object - print('useGroup') + print("useGroup") fix <- xset_pa_filename_fix(opt, pa) pa <- fix[[1]] - useGroup=TRUE + useGroup <- TRUE } -if(is.null(opt$grp_peaklist)){ - grp_peaklist = NA +if (is.null(opt$grp_peaklist)) { + grp_peaklist <- NA }else{ - grp_peaklist = opt$grp_peaklist + grp_peaklist <- opt$grp_peaklist } print(useGroup) - - -pa <- msPurity::frag4feature(pa=pa, - xset=xset, - ppm=opt$ppm, - plim=opt$plim, - intense=intense, - convert2RawRT=convert2RawRT, - db_name='alldata.sqlite', - out_dir=opt$out_dir, - grp_peaklist=grp_peaklist, - create_db=createDB, - use_group=useGroup) - +pa <- msPurity::frag4feature(pa = pa, + xset = xset, + ppm = opt$ppm, + plim = opt$plim, + intense = intense, + convert2RawRT = convert2RawRT, + db_name = "alldata.sqlite", + out_dir = opt$out_dir, + grp_peaklist = grp_peaklist, + create_db = createDB, + use_group = useGroup) print(pa) -save(pa, file=file.path(opt$out_dir, 'frag4feature_output.RData')) +save(pa, file = file.path(opt$out_dir, "frag4feature_output.RData")) pa@grped_df$filename <- sapply(pa@grped_df$fileid, function(x) names(pa@fileList)[as.integer(x)]) print(head(pa@grped_df)) -write.table(pa@grped_df, file.path(opt$out_dir, 'frag4feature_output.tsv'), row.names=FALSE, sep='\t') +write.table(pa@grped_df, file.path(opt$out_dir, "frag4feature_output.tsv"), row.names = FALSE, sep = "\t") diff --git a/tools/msPurity/macros.xml b/tools/msPurity/macros.xml index 7359644..6e0ce14 100644 --- a/tools/msPurity/macros.xml +++ b/tools/msPurity/macros.xml @@ -1,17 +1,17 @@ - 1.12.2 - 3 + 1.16.2 + 0 bioconductor-mspurity - bioconductor-camera - bioconductor-xcms - bioconductor-mspuritydata - r-optparse - r-rpostgres - r-rmysql + bioconductor-camera + bioconductor-xcms + bioconductor-mspuritydata + r-optparse + r-rpostgres + r-rmysql @@ -29,7 +29,7 @@ 100.0 then the range would be from 100.0 to 100.5"/> - - - + + diff --git a/tools/msPurity/purityA.R b/tools/msPurity/purityA.R index 6cfe19e..8b628eb 100644 --- a/tools/msPurity/purityA.R +++ b/tools/msPurity/purityA.R @@ -3,91 +3,90 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("-o", "--out_dir"), type="character"), - make_option("--mzML_files", type="character"), - make_option("--galaxy_names", type="character"), - make_option("--minOffset", type="numeric"), - make_option("--maxOffset", type="numeric"), - make_option("--ilim", type="numeric"), - make_option("--iwNorm", default="none", type="character"), - make_option("--exclude_isotopes", action="store_true"), - make_option("--isotope_matrix", type="character"), - make_option("--mostIntense", action="store_true"), - make_option("--plotP", action="store_true"), - make_option("--nearest", action="store_true"), - make_option("--cores", default=4), - make_option("--ppmInterp", default=7) + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--mzML_files", type = "character"), + make_option("--galaxy_names", type = "character"), + make_option("--minOffset", type = "numeric"), + make_option("--maxOffset", type = "numeric"), + make_option("--ilim", type = "numeric"), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character"), + make_option("--mostIntense", action = "store_true"), + make_option("--plotP", action = "store_true"), + make_option("--nearest", action = "store_true"), + make_option("--cores", default = 4), + make_option("--ppmInterp", default = 7) ) -opt <- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) - -if (opt$iwNorm=='none'){ - iwNorm = FALSE - iwNormFun = NULL -}else if (opt$iwNorm=='gauss'){ - iwNorm = TRUE - if (is.null(opt$minOffset) || is.null(opt$maxOffset)){ - print('User has to define offsets if using Gaussian normalisation') +if (opt$iwNorm == "none") { + iwNorm <- FALSE + iwNormFun <- NULL +}else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { + print("User has to define offsets if using Gaussian normalisation") }else{ - iwNormFun = msPurity::iwNormGauss(minOff=-as.numeric(opt$minOffset), - maxOff=as.numeric(opt$maxOffset)) + iwNormFun <- msPurity::iwNormGauss(minOff = -as.numeric(opt$minOffset), + maxOff = as.numeric(opt$maxOffset)) } -}else if (opt$iwNorm=='rcosine'){ - iwNorm = TRUE - if (is.null(opt$minOffset) || is.null(opt$maxOffset)){ - print('User has to define offsets if using R-cosine normalisation') +}else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { + print("User has to define offsets if using R-cosine normalisation") }else{ - iwNormFun = msPurity::iwNormRcosine(minOff=-as.numeric(opt$minOffset), - maxOff=as.numeric(opt$maxOffset)) + iwNormFun <- msPurity::iwNormRcosine(minOff = -as.numeric(opt$minOffset), + maxOff = as.numeric(opt$maxOffset)) } -}else if (opt$iwNorm=='QE5'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormQE.5() +}else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } -filepaths <- trimws(strsplit(opt$mzML_files, ',')[[1]]) +filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) filepaths <- filepaths[filepaths != ""] -if(is.null(opt$minOffset) || is.null(opt$maxOffset)){ - offsets = NA +if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { + offsets <- NA }else{ - offsets = as.numeric(c(opt$minOffset, opt$maxOffset)) + offsets <- as.numeric(c(opt$minOffset, opt$maxOffset)) } -if(is.null(opt$mostIntense)){ - mostIntense = FALSE +if (is.null(opt$mostIntense)) { + mostIntense <- FALSE }else{ - mostIntense = TRUE + mostIntense <- TRUE } -if(is.null(opt$nearest)){ - nearest = FALSE +if (is.null(opt$nearest)) { + nearest <- FALSE }else{ - nearest = TRUE + nearest <- TRUE } -if(is.null(opt$plotP)){ - plotP = FALSE - plotdir = NULL +if (is.null(opt$plotP)) { + plotP <- FALSE + plotdir <- NULL }else{ - plotP = TRUE - plotdir = opt$out_dir + plotP <- TRUE + plotdir <- opt$out_dir } -if (is.null(opt$isotope_matrix)){ +if (is.null(opt$isotope_matrix)) { im <- NULL }else{ im <- read.table(opt$isotope_matrix, - header = TRUE, sep='\t', stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE) } -if (is.null(opt$exclude_isotopes)){ +if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE }else{ isotopes <- TRUE @@ -110,20 +109,16 @@ pa <- msPurity::purityA(filepaths, ppmInterp = opt$ppmInterp) -if (!is.null(opt$galaxy_names)){ - galaxy_names <- trimws(strsplit(opt$galaxy_names, ',')[[1]]) +if (!is.null(opt$galaxy_names)) { + galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) galaxy_names <- galaxy_names[galaxy_names != ""] names(pa@fileList) <- galaxy_names } print(pa) -save(pa, file=file.path(opt$out_dir, 'purityA_output.RData')) +save(pa, file = file.path(opt$out_dir, "purityA_output.RData")) pa@puritydf$filename <- sapply(pa@puritydf$fileid, function(x) names(pa@fileList)[as.integer(x)]) print(head(pa@puritydf)) -write.table(pa@puritydf, file.path(opt$out_dir, 'purityA_output.tsv'), row.names=FALSE, sep='\t') - -# removed_peaks <- data.frame(removed_peaks) -# write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), -# file.path(opt$out_dir, 'removed_peaks.txt'), row.names=FALSE, sep='\t') +write.table(pa@puritydf, file.path(opt$out_dir, "purityA_output.tsv"), row.names = FALSE, sep = "\t") diff --git a/tools/msPurity/purityX.R b/tools/msPurity/purityX.R index 1a0df60..de9a986 100644 --- a/tools/msPurity/purityX.R +++ b/tools/msPurity/purityX.R @@ -3,67 +3,66 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("--xset_path"), type="character"), - make_option(c("-o", "--out_dir"), type="character"), - make_option(c("--mzML_path"), type="character"), - make_option("--minOffset", default=0.5), - make_option("--maxOffset", default=0.5), - make_option("--ilim", default=0.05), - make_option("--iwNorm", default="none", type="character"), - make_option("--exclude_isotopes", action="store_true"), - make_option("--isotope_matrix", type="character"), - make_option("--purityType", default="purityFWHMmedian"), - make_option("--singleFile", default=0), - make_option("--cores", default=4), - make_option("--xgroups", type="character"), - make_option("--rdata_name", default='xset'), - make_option("--camera_xcms", default='xset'), - make_option("--files", type="character"), - make_option("--galaxy_files", type="character"), - make_option("--choose_class", type="character"), - make_option("--ignore_files", type="character"), - make_option("--rtraw_columns", action="store_true") + make_option(c("--xset_path"), type = "character"), + make_option(c("-o", "--out_dir"), type = "character"), + make_option(c("--mzML_path"), type = "character"), + make_option("--minOffset", default = 0.5), + make_option("--maxOffset", default = 0.5), + make_option("--ilim", default = 0.05), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character"), + make_option("--purityType", default = "purityFWHMmedian"), + make_option("--singleFile", default = 0), + make_option("--cores", default = 4), + make_option("--xgroups", type = "character"), + make_option("--rdata_name", default = "xset"), + make_option("--camera_xcms", default = "xset"), + make_option("--files", type = "character"), + make_option("--galaxy_files", type = "character"), + make_option("--choose_class", type = "character"), + make_option("--ignore_files", type = "character"), + make_option("--rtraw_columns", action = "store_true") ) -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) -if (!is.null(opt$xgroups)){ - xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) +if (!is.null(opt$xgroups)) { + xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]]) }else{ - xgroups = NULL + xgroups <- NULL } - print(xgroups) -if (!is.null(opt$remove_nas)){ - df <- df[!is.na(df$mz),] +if (!is.null(opt$remove_nas)) { + df <- df[!is.na(df$mz), ] } -if (is.null(opt$isotope_matrix)){ +if (is.null(opt$isotope_matrix)) { im <- NULL }else{ im <- read.table(opt$isotope_matrix, - header = TRUE, sep='\t', stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE) } -if (is.null(opt$exclude_isotopes)){ +if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE }else{ isotopes <- TRUE } -if (is.null(opt$rtraw_columns)){ +if (is.null(opt$rtraw_columns)) { rtraw_columns <- FALSE }else{ rtraw_columns <- TRUE } -loadRData <- function(rdata_path, xset_name){ +loadRData <- function(rdata_path, xset_name) { #loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() == xset_name])) @@ -71,7 +70,7 @@ loadRData <- function(rdata_path, xset_name){ target_obj <- loadRData(opt$xset_path, opt$rdata_name) -if (opt$camera_xcms=='camera'){ +if (opt$camera_xcms == "camera") { xset <- target_obj@xcmsSet }else{ xset <- target_obj @@ -79,36 +78,35 @@ if (opt$camera_xcms=='camera'){ print(xset) -minOffset = as.numeric(opt$minOffset) -maxOffset = as.numeric(opt$maxOffset) - - -if (opt$iwNorm=='none'){ - iwNorm = FALSE - iwNormFun = NULL -}else if (opt$iwNorm=='gauss'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='rcosine'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) -}else if (opt$iwNorm=='QE5'){ - iwNorm = TRUE - iwNormFun = msPurity::iwNormQE.5() +minOffset <- as.numeric(opt$minOffset) +maxOffset <- as.numeric(opt$maxOffset) + +if (opt$iwNorm == "none") { + iwNorm <- FALSE + iwNormFun <- NULL +}else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) +}else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } print(xset@filepaths) -if (!is.null(opt$files)){ - updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) +if (!is.null(opt$files)) { + updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]]) updated_filepaths <- updated_filepaths[updated_filepaths != ""] print(updated_filepaths) - updated_filenames = basename(updated_filepaths) - original_filenames = basename(xset@filepaths) - update_idx = match(updated_filenames, original_filenames) + updated_filenames <- basename(updated_filepaths) + original_filenames <- basename(xset@filepaths) + update_idx <- match(updated_filenames, original_filenames) - if (!is.null(opt$galaxy_files)){ - galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) + if (!is.null(opt$galaxy_files)) { + galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]]) galaxy_files <- galaxy_files[galaxy_files != ""] xset@filepaths <- galaxy_files[update_idx] }else{ @@ -116,27 +114,26 @@ if (!is.null(opt$files)){ } } -if (!is.null(opt$choose_class)){ - classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) - +if (!is.null(opt$choose_class)) { + classes <- trimws(strsplit(opt$choose_class, ",")[[1]]) ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) - print('choose class') + print("choose class") print(ignore_files_class) }else{ ignore_files_class <- NA } -if (!is.null(opt$ignore_files)){ - ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) +if (!is.null(opt$ignore_files)) { + ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]]) filenames <- rownames(xset@phenoData) ignore_files <- which(filenames %in% ignore_files_string) ignore_files <- unique(c(ignore_files, ignore_files_class)) ignore_files <- ignore_files[ignore_files != ""] }else{ - if (anyNA(ignore_files_class)){ + if (anyNA(ignore_files_class)) { ignore_files <- NULL }else{ ignore_files <- ignore_files_class @@ -144,41 +141,40 @@ if (!is.null(opt$ignore_files)){ } -print('ignore_files') +print("ignore_files") print(ignore_files) -ppLCMS <- msPurity::purityX(xset=xset, - offsets=c(minOffset, maxOffset), - cores=opt$cores, - xgroups=xgroups, - purityType=opt$purityType, - ilim = opt$ilim, - isotopes = isotopes, - im = im, - iwNorm = iwNorm, - iwNormFun = iwNormFun, - singleFile = opt$singleFile, - fileignore = ignore_files, - rtrawColumns=rtraw_columns) - +ppLCMS <- msPurity::purityX(xset = xset, + offsets = c(minOffset, maxOffset), + cores = opt$cores, + xgroups = xgroups, + purityType = opt$purityType, + ilim = opt$ilim, + isotopes = isotopes, + im = im, + iwNorm = iwNorm, + iwNormFun = iwNormFun, + singleFile = opt$singleFile, + fileignore = ignore_files, + rtrawColumns = rtraw_columns) dfp <- ppLCMS@predictions # to make compatable with deconrank -colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' -colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' -colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' -colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' -colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' -colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' -colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' -if(sum(is.na(dfp$medianPurity))>0){ - dfp[is.na(dfp$medianPurity),]$medianPurity = 0 +colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID" +colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity" +colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity" +colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity" +colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity" +colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity" +colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity" + +if (sum(is.na(dfp$medianPurity)) > 0) { + dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0 } - print(head(dfp)) -write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t') +write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t") -save.image(file.path(opt$out_dir, 'purityX_output.RData')) +save.image(file.path(opt$out_dir, "purityX_output.RData")) diff --git a/tools/msPurity/spectralMatching.R b/tools/msPurity/spectralMatching.R index d7cd2a7..26c8a65 100644 --- a/tools/msPurity/spectralMatching.R +++ b/tools/msPurity/spectralMatching.R @@ -3,107 +3,111 @@ library(msPurityData) library(optparse) print(sessionInfo()) # load in library spectra config -source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) +} source_local("dbconfig.R") option_list <- list( - make_option(c("-o", "--outDir"), type="character"), - make_option("--q_dbPth", type="character"), - make_option("--l_dbPth", type="character"), + make_option(c("-o", "--outDir"), type = "character"), + make_option("--q_dbPth", type = "character"), + make_option("--l_dbPth", type = "character"), - make_option("--q_dbType", type="character", default=NA), - make_option("--l_dbType", type="character", default=NA), + make_option("--q_dbType", type = "character", default = NA), + make_option("--l_dbType", type = "character", default = NA), - make_option("--q_msp", type="character", default=NA), - make_option("--l_msp", type="character", default=NA), + make_option("--q_msp", type = "character", default = NA), + make_option("--l_msp", type = "character", default = NA), - make_option("--q_defaultDb", action="store_true"), - make_option("--l_defaultDb", action="store_true"), + make_option("--q_defaultDb", action = "store_true"), + make_option("--l_defaultDb", action = "store_true"), - make_option("--q_ppmPrec", type="double"), - make_option("--l_ppmPrec", type="double"), + make_option("--q_ppmPrec", type = "double"), + make_option("--l_ppmPrec", type = "double"), - make_option("--q_ppmProd", type="double"), - make_option("--l_ppmProd", type="double"), + make_option("--q_ppmProd", type = "double"), + make_option("--l_ppmProd", type = "double"), - make_option("--q_raThres", type="double", default=NA), - make_option("--l_raThres", type="double", default=NA), + make_option("--q_raThres", type = "double", default = NA), + make_option("--l_raThres", type = "double", default = NA), - make_option("--q_polarity", type="character", default=NA), - make_option("--l_polarity", type="character", default=NA), + make_option("--q_polarity", type = "character", default = NA), + make_option("--l_polarity", type = "character", default = NA), - make_option("--q_purity", type="double", default=NA), - make_option("--l_purity", type="double", default=NA), + make_option("--q_purity", type = "double", default = NA), + make_option("--l_purity", type = "double", default = NA), - make_option("--q_xcmsGroups", type="character", default=NA), - make_option("--l_xcmsGroups", type="character", default=NA), + make_option("--q_xcmsGroups", type = "character", default = NA), + make_option("--l_xcmsGroups", type = "character", default = NA), - make_option("--q_pids", type="character", default=NA), - make_option("--l_pids", type="character", default=NA), + make_option("--q_pids", type = "character", default = NA), + make_option("--l_pids", type = "character", default = NA), - make_option("--q_rtrangeMin", type="double", default=NA), - make_option("--l_rtrangeMin", type="double", default=NA), + make_option("--q_rtrangeMin", type = "double", default = NA), + make_option("--l_rtrangeMin", type = "double", default = NA), - make_option("--q_rtrangeMax", type="double", default=NA), - make_option("--l_rtrangeMax", type="double", default=NA), + make_option("--q_rtrangeMax", type = "double", default = NA), + make_option("--l_rtrangeMax", type = "double", default = NA), - make_option("--q_accessions", type="character", default=NA), - make_option("--l_accessions", type="character", default=NA), + make_option("--q_accessions", type = "character", default = NA), + make_option("--l_accessions", type = "character", default = NA), - make_option("--q_sources", type="character", default=NA), - make_option("--l_sources", type="character", default=NA), + make_option("--q_sources", type = "character", default = NA), + make_option("--l_sources", type = "character", default = NA), - make_option("--q_sourcesUser", type="character", default=NA), - make_option("--l_sourcesUser", type="character", default=NA), + make_option("--q_sourcesUser", type = "character", default = NA), + make_option("--l_sourcesUser", type = "character", default = NA), - make_option("--q_instrumentTypes", type="character", default=NA), - make_option("--l_instrumentTypes", type="character", default=NA), + make_option("--q_instrumentTypes", type = "character", default = NA), + make_option("--l_instrumentTypes", type = "character", default = NA), - make_option("--q_instrumentTypesUser", type="character", default=NA), - make_option("--l_instrumentTypesUser", type="character", default=NA), + make_option("--q_instrumentTypesUser", type = "character", default = NA), + make_option("--l_instrumentTypesUser", type = "character", default = NA), - make_option("--q_instruments", type="character", default=NA), - make_option("--l_instruments", type="character", default=NA), + make_option("--q_instruments", type = "character", default = NA), + make_option("--l_instruments", type = "character", default = NA), - make_option("--q_spectraTypes", type="character", default=NA), - make_option("--l_spectraTypes", type="character", default=NA), + make_option("--q_spectraTypes", type = "character", default = NA), + make_option("--l_spectraTypes", type = "character", default = NA), - make_option("--q_spectraFilter", action="store_true"), - make_option("--l_spectraFilter", action="store_true"), + make_option("--q_spectraFilter", action = "store_true"), + make_option("--l_spectraFilter", action = "store_true"), - make_option("--usePrecursors", action="store_true"), + make_option("--usePrecursors", action = "store_true"), - make_option("--mzW", type="double"), - make_option("--raW", type="double"), + make_option("--mzW", type = "double"), + make_option("--raW", type = "double"), - make_option("--rttol", type="double", default=NA), + make_option("--rttol", type = "double", default = NA), - make_option("--updateDb", action="store_true"), - make_option("--copyDb", action="store_true"), - make_option("--cores", default=1) + make_option("--updateDb", action = "store_true"), + make_option("--copyDb", action = "store_true"), + make_option("--cores", default = 1) ) # store options -opt<- parse_args(OptionParser(option_list=option_list)) +opt <- parse_args(OptionParser(option_list = option_list)) print(opt) # check if the sqlite databases have any spectra -checkSPeakMeta <- function(dbPth, nme){ - if(is.null(dbPth)){ +checkSPeakMeta <- function(dbPth, nme) { + if (is.null(dbPth)) { return(TRUE) - }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){ + }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) { con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) - if (DBI::dbExistsTable(con, "s_peak_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1') + if (DBI::dbExistsTable(con, "s_peak_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1") return(TRUE) - }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1') + }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1") return(TRUE) }else{ - print(paste("No spectra available for ",nme)) + print(paste("No spectra available for ", nme)) return(FALSE) } }else{ @@ -111,110 +115,111 @@ checkSPeakMeta <- function(dbPth, nme){ return(FALSE) } - + } -addQueryNameColumn <- function(sm){ - if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){ +addQueryNameColumn <- function(sm) { + if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) { return(sm) } - con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) - if (DBI::dbExistsTable(con, "s_peak_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta') - }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ - spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta') + con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) + if (DBI::dbExistsTable(con, "s_peak_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT pid, name AS query_entry_name FROM s_peak_meta") + }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { + spm <- DBI::dbGetQuery(con, "SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta") } print(sm$matchedResults) - if ('pid' %in% colnames(sm$matchedResults)){ - sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid') + if ("pid" %in% colnames(sm$matchedResults)) { + sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid") }else{ - sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid') + sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid") } - + print(sm$xcmsMatchedResults) - if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){ + if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) { return(sm) }else{ - if ('pid' %in% colnames(sm$xcmsMatchedResults)){ - sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid') + if ("pid" %in% colnames(sm$xcmsMatchedResults)) { + sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid") }else{ - sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid') + sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid") } } - + return(sm) - + } -updateDbF <- function(q_con, l_con){ - message('Adding extra details to database') - q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) - if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ - l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') - colnames(l_s_peak_meta)[1] <- 'pid' +updateDbF <- function(q_con, l_con) { + message("Adding extra details to database") + q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) + if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) { + l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT * FROM l_s_peak_meta") + colnames(l_s_peak_meta)[1] <- "pid" } - - l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) - if (DBI::dbExistsTable(l_con, "s_peaks")){ - l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) - - }else if(DBI::dbExistsTable(l_con, "library_spectra")){ + + l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth) + if (DBI::dbExistsTable(l_con, "s_peaks")) { + l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) + + }else if (DBI::dbExistsTable(l_con, "library_spectra")) { l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra - WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) + WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) }else{ - l_s_peaks = NULL + l_s_peaks <- NULL } - - if (DBI::dbExistsTable(l_con, "source")){ - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') + + if (DBI::dbExistsTable(l_con, "source")) { + l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM source") }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { - l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') + l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM library_spectra_source") }else{ - l_source = NULL + l_source <- NULL } - - if (!is.null(l_s_peaks)){ - DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) + + if (!is.null(l_s_peaks)) { + DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE) } - - if (!is.null(l_source)){ - DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) + + if (!is.null(l_source)) { + DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE) } - - + } -extractMultiple <- function(optParam){ - if (!is.na(optParam)){ - param <- trimws(strsplit(optParam, ',')[[1]]) +extractMultiple <- function(optParam) { + if (!is.na(optParam)) { + param <- trimws(strsplit(optParam, ",")[[1]]) param <- param[param != ""] - }else{ + }else { param <- NA } return(param) } -if(!is.null(opt$q_defaultDb)){ - q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") - q_dbType <- 'sqlite' -}else if (!opt$q_dbType=='local_config'){ +if (!is.null(opt$q_defaultDb)) { + q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") + q_dbType <- "sqlite" +}else if (!opt$q_dbType == "local_config") { q_dbType <- opt$q_dbType q_dbPth <- opt$q_dbPth } -if(!is.null(opt$l_defaultDb)){ - l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") - l_dbType <- 'sqlite' -}else if (!opt$l_dbType=='local_config'){ +if (!is.null(opt$l_defaultDb)) { + l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") + l_dbType <- "sqlite" +}else if (!opt$l_dbType == "local_config") { l_dbType <- opt$l_dbType l_dbPth <- opt$l_dbPth } +q_spectraTypes <- extractMultiple(opt$q_spectraTypes) +l_spectraTypes <- extractMultiple(opt$l_spectraTypes) q_polarity <- extractMultiple(opt$q_polarity) l_polarity <- extractMultiple(opt$l_polarity) @@ -231,115 +236,118 @@ l_sources <- extractMultiple(opt$l_sources) q_sourcesUser <- extractMultiple(opt$q_sourcesUser) l_sourcesUser <- extractMultiple(opt$l_sourcesUser) -q_sources <-c(q_sources, q_sourcesUser) -l_sources <-c(l_sources, l_sourcesUser) +q_sources <- c(q_sources, q_sourcesUser) +l_sources <- c(l_sources, l_sourcesUser) q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes) l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes) -q_instrumentTypes <-c(q_instrumentTypes, q_instrumentTypes) -l_instrumentTypes <-c(l_instrumentTypes, l_instrumentTypes) +q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes) +l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes) -if(!is.null(opt$l_spectraFilter)){ +if (!is.null(opt$l_spectraFilter)) { l_spectraFilter <- TRUE }else{ l_spectraFilter <- FALSE } -if(!is.null(opt$q_spectraFilter)){ +if (!is.null(opt$q_spectraFilter)) { q_spectraFilter <- TRUE }else{ q_spectraFilter <- FALSE } -if(!is.null(opt$updateDb)){ +if (!is.null(opt$updateDb)) { updateDb <- TRUE }else{ updateDb <- FALSE } -if(!is.null(opt$copyDb)){ +if (!is.null(opt$copyDb)) { copyDb <- TRUE }else{ copyDb <- FALSE } -if(!is.null(opt$l_rtrangeMax)){ +if (!is.null(opt$l_rtrangeMax)) { l_rtrangeMax <- opt$l_rtrangeMax }else{ l_rtrangeMax <- NA } -if(!is.null(opt$q_rtrangeMax)){ +if (!is.null(opt$q_rtrangeMax)) { q_rtrangeMax <- opt$q_rtrangeMax }else{ q_rtrangeMax <- NA } -if(!is.null(opt$l_rtrangeMin)){ +if (!is.null(opt$l_rtrangeMin)) { l_rtrangeMin <- opt$l_rtrangeMin }else{ l_rtrangeMin <- NA } -if(!is.null(opt$q_rtrangeMin)){ +if (!is.null(opt$q_rtrangeMin)) { q_rtrangeMin <- opt$q_rtrangeMin }else{ q_rtrangeMin <- NA } -q_check <- checkSPeakMeta(opt$q_dbPth, 'query') -l_check <- checkSPeakMeta(opt$l_dbPth, 'library') +q_check <- checkSPeakMeta(opt$q_dbPth, "query") +l_check <- checkSPeakMeta(opt$l_dbPth, "library") -if (q_check && l_check){ +if (q_check && l_check) { sm <- msPurity::spectralMatching( q_purity = opt$q_purity, l_purity = opt$l_purity, - + q_ppmProd = opt$q_ppmProd, l_ppmProd = opt$l_ppmProd, - + q_ppmPrec = opt$q_ppmPrec, l_ppmPrec = opt$l_ppmPrec, - + q_raThres = opt$q_raThres, l_raThres = opt$l_raThres, - + q_pol = q_polarity, l_pol = l_polarity, - + + q_spectraFilter = q_spectraFilter, + l_spectraFilter = l_spectraFilter, + q_xcmsGroups = q_xcmsGroups, l_xcmsGroups = l_xcmsGroups, - + q_pids = q_pids, l_pids = l_pids, - + q_sources = q_sources, l_sources = l_sources, - + q_instrumentTypes = q_instrumentTypes, l_instrumentTypes = l_instrumentTypes, - - q_spectraFilter= q_spectraFilter, - l_spectraFilter= l_spectraFilter, - - l_rtrange=c(l_rtrangeMin, l_rtrangeMax), - q_rtrange=c(q_rtrangeMin, q_rtrangeMax), - + + q_spectraTypes = q_spectraTypes, + l_spectraTypes = l_spectraTypes, + + l_rtrange = c(l_rtrangeMin, l_rtrangeMax), + q_rtrange = c(q_rtrangeMin, q_rtrangeMax), + q_accessions = opt$q_accessions, - l_accessions= opt$l_accessions, - + l_accessions = opt$l_accessions, + raW = opt$raW, mzW = opt$mzW, - rttol=opt$rttol, - cores=opt$cores, - - copyDb=copyDb, - updateDb=updateDb, + rttol = opt$rttol, + cores = opt$cores, + + copyDb = copyDb, + updateDb = updateDb, outPth = "db_with_spectral_matching.sqlite", - + q_dbPth = q_dbPth, q_dbType = q_dbType, q_dbName = q_dbName, @@ -347,7 +355,7 @@ if (q_check && l_check){ q_dbUser = q_dbUser, q_dbPass = q_dbPass, q_dbPort = q_dbPort, - + l_dbPth = l_dbPth, l_dbType = l_dbType, l_dbName = l_dbName, @@ -355,15 +363,15 @@ if (q_check && l_check){ l_dbUser = l_dbUser, l_dbPass = l_dbPass, l_dbPort = l_dbPort - + ) - + sm <- addQueryNameColumn(sm) # Get name of the query results (and merged with the data frames) - write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) - write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) - - if(updateDb){ + write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) + write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) + + if (updateDb) { updateDbF(q_con, l_con) } } diff --git a/tools/msPurity/spectralMatching.xml b/tools/msPurity/spectralMatching.xml index cf394c8..b6ac784 100644 --- a/tools/msPurity/spectralMatching.xml +++ b/tools/msPurity/spectralMatching.xml @@ -221,6 +221,8 @@ + + @@ -234,6 +236,8 @@ + + @@ -360,10 +364,10 @@ without the XCMS details. This table is useful to observe spectral matching resu for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features. -.. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html +.. _spectral_database_schema: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases -.. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html +.. _msPurity_spectral_matching_vignette: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html ]]> diff --git a/tools/msPurity/test-data/createDatabase_output.sqlite b/tools/msPurity/test-data/createDatabase_output.sqlite index a77dd85..74f7125 100644 Binary files a/tools/msPurity/test-data/createDatabase_output.sqlite and b/tools/msPurity/test-data/createDatabase_output.sqlite differ diff --git a/tools/msPurity/test-data/createDatabase_output_eic.sqlite b/tools/msPurity/test-data/createDatabase_output_eic.sqlite index 4b85f0a..62d2d99 100644 Binary files a/tools/msPurity/test-data/createDatabase_output_eic.sqlite and b/tools/msPurity/test-data/createDatabase_output_eic.sqlite differ diff --git a/tools/msPurity/test-data/createMSP_output_av_all_metadata.msp b/tools/msPurity/test-data/createMSP_output_av_all_metadata.msp index 7beaff8..9899202 100644 --- a/tools/msPurity/test-data/createMSP_output_av_all_metadata.msp +++ b/tools/msPurity/test-data/createMSP_output_av_all_metadata.msp @@ -10,7 +10,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -28,7 +28,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 diff --git a/tools/msPurity/test-data/createMSP_output_av_all_metadata_custom_adducts.msp b/tools/msPurity/test-data/createMSP_output_av_all_metadata_custom_adducts.msp index 2f1fa88..5239b87 100644 --- a/tools/msPurity/test-data/createMSP_output_av_all_metadata_custom_adducts.msp +++ b/tools/msPurity/test-data/createMSP_output_av_all_metadata_custom_adducts.msp @@ -10,7 +10,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -28,7 +28,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -46,7 +46,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -64,7 +64,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -82,7 +82,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 @@ -100,7 +100,7 @@ CH$LINK: INCHIKEY VACCAVUAMIDAGB-UHFFFAOYSA-N CH$LINK: PUBCHEM CID:5328 CH$NAME Unknown XCMS groupid (grpid): 12 -COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.12.2 +COMMENT: Exported from msPurity purityA object using function createMSP, using method 'av_all' msPurity version:1.16.2 PK$NUM_PEAK: 2 PK$PEAK: m/z int. rel.int. 112.050884246826 502873.46875 100 diff --git a/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching.sqlite b/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching.sqlite index c674639..dae259c 100644 Binary files a/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching.sqlite and b/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching.sqlite differ diff --git a/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite b/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite index c674639..dae259c 100644 Binary files a/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite and b/tools/msPurity/test-data/spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite differ diff --git a/tools/msPurity/test-data/spectralMatching_matched_results.tsv b/tools/msPurity/test-data/spectralMatching_matched_results.tsv index 4243d89..057687a 100644 --- a/tools/msPurity/test-data/spectralMatching_matched_results.tsv +++ b/tools/msPurity/test-data/spectralMatching_matched_results.tsv @@ -1,3 +1,3 @@ "pid" "grpid" "mz" "mzmin" "mzmax" "rt" "rtmin" "rtmax" "npeaks" "sample" "LCMSMS_2" "LCMSMS_1" "LCMS_2" "LCMS_1" "grp_name" "lpid" "mid" "dpc" "rdpc" "cdpc" "mcount" "allcount" "mpercent" "library_rt" "query_rt" "rtdiff" "library_precursor_mz" "query_precursor_mz" "library_precursor_ion_purity" "query_precursor_ion_purity" "library_accession" "library_precursor_type" "library_entry_name" "inchikey" "library_source_name" "library_compound_name" "query_entry_name" -1663 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 1 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA -1664 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 2 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1665 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 1 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1666 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 2 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA diff --git a/tools/msPurity/test-data/spectralMatching_matched_results_instrumentTypes.tsv b/tools/msPurity/test-data/spectralMatching_matched_results_instrumentTypes.tsv index 4243d89..057687a 100644 --- a/tools/msPurity/test-data/spectralMatching_matched_results_instrumentTypes.tsv +++ b/tools/msPurity/test-data/spectralMatching_matched_results_instrumentTypes.tsv @@ -1,3 +1,3 @@ "pid" "grpid" "mz" "mzmin" "mzmax" "rt" "rtmin" "rtmax" "npeaks" "sample" "LCMSMS_2" "LCMSMS_1" "LCMS_2" "LCMS_1" "grp_name" "lpid" "mid" "dpc" "rdpc" "cdpc" "mcount" "allcount" "mpercent" "library_rt" "query_rt" "rtdiff" "library_precursor_mz" "query_precursor_mz" "library_precursor_ion_purity" "query_precursor_ion_purity" "library_accession" "library_precursor_type" "library_entry_name" "inchikey" "library_source_name" "library_compound_name" "query_entry_name" -1663 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 1 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA -1664 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 2 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1665 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 1 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1666 14 113.035283604395 113.035156497997 113.03541992282 80.50932 77.16429 83.64567 4 4 88462324.9597518 92812020.095242 77298864.2688328 77198465.9156761 "M113T81" 56653 2 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA diff --git a/tools/msPurity/test-data/spectralMatching_xcms_matched_results.tsv b/tools/msPurity/test-data/spectralMatching_xcms_matched_results.tsv index 7349920..658f8a8 100644 --- a/tools/msPurity/test-data/spectralMatching_xcms_matched_results.tsv +++ b/tools/msPurity/test-data/spectralMatching_xcms_matched_results.tsv @@ -1,3 +1,3 @@ "qpid" "lpid" "mid" "dpc" "rdpc" "cdpc" "mcount" "allcount" "mpercent" "library_rt" "query_rt" "rtdiff" "library_precursor_mz" "query_precursor_mz" "library_precursor_ion_purity" "query_precursor_ion_purity" "library_accession" "library_precursor_type" "library_entry_name" "inchikey" "library_source_name" "library_compound_name" "query_entry_name" -1663 56653 1 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA -1664 56653 2 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1665 56653 1 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1666 56653 2 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA diff --git a/tools/msPurity/test-data/spectralMatching_xcms_matched_results_instrumentTypes.tsv b/tools/msPurity/test-data/spectralMatching_xcms_matched_results_instrumentTypes.tsv index 7349920..658f8a8 100644 --- a/tools/msPurity/test-data/spectralMatching_xcms_matched_results_instrumentTypes.tsv +++ b/tools/msPurity/test-data/spectralMatching_xcms_matched_results_instrumentTypes.tsv @@ -1,3 +1,3 @@ "qpid" "lpid" "mid" "dpc" "rdpc" "cdpc" "mcount" "allcount" "mpercent" "library_rt" "query_rt" "rtdiff" "library_precursor_mz" "query_precursor_mz" "library_precursor_ion_purity" "query_precursor_ion_purity" "library_accession" "library_precursor_type" "library_entry_name" "inchikey" "library_source_name" "library_compound_name" "query_entry_name" -1663 56653 1 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA -1664 56653 2 0.942265461847349 0.996860823822942 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1665 56653 1 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA +1666 56653 2 0.942265461847349 0.945232713864488 0.753812369477879 1 4 0.25 NA 80.50932 NA "113.03508" 113.035283604395 NA 1 "PR100037" "[M+H]+" "Uracil" "ISAKRJDGNUQOIC-UHFFFAOYSA-N" "massbank" "Uracil" NA