Skip to content

Commit

Permalink
NEW: Add tool specifically for importing fastq data both paired and u…
Browse files Browse the repository at this point in the history
…npaired (#49)
  • Loading branch information
Oddant1 authored Feb 27, 2024
1 parent 2bd7ea3 commit a124fa1
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 5 deletions.
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,4 @@ clean: distclean
rm -rf ./rendered/tests/suite_*; \
rm -rf ./rendered/tools/suite_*

distclean: ;

distclean: ;
85 changes: 84 additions & 1 deletion q2galaxy/core/drivers/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,23 @@

from q2galaxy.core.drivers.stdio import error_handler, stdio_files

# Verify that the types the tool relies on are present and use this information
# in q2galaxy/core/templaters/__init__.py to determine whether or not to render
# the tool.
#
# We do these imports here because they are needed for import_fastq_data, and
# we set this variable because the presence/absence of these types is needed in
# templaters init.
IMPORT_FASTQ = True

try:
from q2_types.per_sample_sequences import (
CasavaOneEightSingleLanePerSampleDirFmt, SequencesWithQuality,
PairedEndSequencesWithQuality)
from q2_types.sample_data import SampleData
except Exception:
IMPORT_FASTQ = False


def builtin_runner(action_id, inputs):
with stdio_files() as stdio:
Expand All @@ -29,8 +46,10 @@ def _get_tool(action_id):
builtin_map = {
'import': import_data,
'export': export_data,
'qza_to_tabular': qza_to_tabular
'qza_to_tabular': qza_to_tabular,
'import-fastq': import_fastq_data
}

try:
return builtin_map[action_id]
except KeyError:
Expand All @@ -46,6 +65,70 @@ def import_data(inputs, stdio):
_stdio=stdio)


def import_fastq_data(inputs, stdio):
paired = _is_paired(inputs, _stdio=stdio)

type_ = SampleData[PairedEndSequencesWithQuality] if paired \
else SampleData[SequencesWithQuality]
format_ = CasavaOneEightSingleLanePerSampleDirFmt
files_to_move = _import_fastq_get_files_to_move(
inputs, paired, _stdio=stdio)

artifact = _import_name_data(type_, format_, files_to_move, _stdio=stdio)
_import_save(artifact, _stdio=stdio)


@error_handler(header='Unexpected error determining if data is paired: ')
def _is_paired(inputs):
# I'm not super convinced this is reliable
return any(x in os.path.basename(inputs['import'][0]['staging_path'])
for x in ('forward', 'reverse'))


@error_handler(header='Unexpected error getting files to move: ')
def _import_fastq_get_files_to_move(inputs, paired):
idx = 0
files_to_move = []
for input_ in inputs['import']:
staging_path = input_['staging_path']
source_path = input_['source_path']

if paired and 'reverse' in os.path.basename(staging_path):
files_to_move.append(
(source_path, _to_casava(staging_path, idx, paired, 'R2')))
else:
files_to_move.append(
(source_path, _to_casava(staging_path, idx, paired, 'R1')))

idx += 1

return files_to_move


# NOTE: If single end data is uploaded with no extension ex:
#
# sampleid
# vs
# sampleid.fastq.gz
#
# Then the user must choose the correct type during upload (.fastq.gz). If they
# leave it on auto then everything goes wrong because galaxy doesn't seem to
# know what type to choose, so we seemingly get None. If they choose the wrong
# one then. . . Well don't choose the wrong one
def _to_casava(path, idx, paired, dir):
# This only works if for paired they rename the pair to only the sample-id
# and for single their filename is only the sample-id
if paired:
sample_id = os.path.split(path)[0]
else:
# Splitting on just .fastq and taking the first thing ought to work for
# any extension we are likely to see provided they don't put .fastq in
# their sampleid, and I'm fine with not letting them do that
sample_id = path.split('.fastq')[0]

return f"{sample_id}_{idx}_L001_{dir}_001.fastq.gz"


@error_handler(header='Unexpected error collecting arguments: ')
def _import_get_args(inputs):
type_ = qiime2.sdk.parse_type(inputs.pop('type'))
Expand Down
12 changes: 10 additions & 2 deletions q2galaxy/core/templaters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,23 @@
from q2galaxy.core.templaters.action import make_tool
from q2galaxy.core.templaters.common import make_tool_id
from q2galaxy.core.templaters.import_data import make_builtin_import
from q2galaxy.core.templaters.import_fastq_data import \
make_builtin_import_fastq
from q2galaxy.core.templaters.export_data import make_builtin_export
# from q2galaxy.core.templaters.qza_to_tabular import make_builtin_to_tabular

from q2galaxy.core.drivers.builtins import IMPORT_FASTQ

BUILTIN_MAKERS = types.MappingProxyType({
BUILTINS = {
make_tool_id('tools', 'import'): make_builtin_import,
make_tool_id('tools', 'export'): make_builtin_export,
# make_tool_id('tools', 'qza_to_tabular'): make_builtin_to_tabular,
})
}

if IMPORT_FASTQ:
BUILTINS[make_tool_id('tools', 'import_fastq')] = make_builtin_import_fastq

BUILTIN_MAKERS = types.MappingProxyType(BUILTINS)


__all__ = ['make_tool', 'make_tool_id', 'BUILTIN_MAKERS']
59 changes: 59 additions & 0 deletions q2galaxy/core/templaters/import_fastq_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2018-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from qiime2.sdk import PluginManager

from q2galaxy.core.util import XMLNode
from q2galaxy.core.templaters.common import (make_builtin_version,
make_tool_name_from_id,
make_requirements,
make_citations)


def make_builtin_import_fastq(meta, tool_id):
pm = PluginManager()

plugins = set()
for record in sorted(pm.get_semantic_types().values(),
key=lambda x: str(x.semantic_type)):
plugins.add(record.plugin)

tool = XMLNode('tool', id=tool_id, name=make_tool_name_from_id(tool_id),
version=make_builtin_version(plugins))
tool.append(_make_input())
tool.append(_make_output())
tool.append(
XMLNode('command', "q2galaxy run tools import-fastq '$inputs'"))
tool.append(XMLNode('description',
'Import fastq data into a QIIME 2 artifact'))
tool.append(_make_config())
tool.append(make_citations())
tool.append(make_requirements(meta, *[p.project_name for p in plugins]))
return tool


def _make_config():
configfiles = XMLNode('configfiles')
configfiles.append(XMLNode(
'inputs', name='inputs', data_style='staging_path_and_source_path'))
return configfiles


def _make_input():
inputs = XMLNode('inputs')
inputs.append(XMLNode(
'param', name='import', type='data_collection',
collection_type='list, list:paired'))
return inputs


def _make_output():
outputs = XMLNode('outputs')
outputs.append(XMLNode(
'data', name='imported-data', format='qza',
from_work_dir='imported_data.qza'))
return outputs

0 comments on commit a124fa1

Please sign in to comment.