Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tool specifically for importing fastq data both paired and unpaired #49

Merged
merged 24 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,4 @@ clean: distclean
rm -rf ./rendered/tests/suite_*; \
rm -rf ./rendered/tools/suite_*

distclean: ;

distclean: ;
70 changes: 70 additions & 0 deletions q2galaxy/core/drivers/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
import qiime2.sdk
import qiime2.util

from q2_types.per_sample_sequences import (
CasavaOneEightSingleLanePerSampleDirFmt, SequencesWithQuality,
PairedEndSequencesWithQuality)
from q2_types.sample_data import SampleData
Oddant1 marked this conversation as resolved.
Show resolved Hide resolved

from q2galaxy.core.drivers.stdio import error_handler, stdio_files


Expand All @@ -28,6 +33,7 @@ def builtin_runner(action_id, inputs):
def _get_tool(action_id):
builtin_map = {
'import': import_data,
'import-fastq': import_fastq_data,
'export': export_data,
'qza_to_tabular': qza_to_tabular
}
Expand All @@ -46,6 +52,70 @@ def import_data(inputs, stdio):
_stdio=stdio)


def import_fastq_data(inputs, stdio):
paired = _is_paired(inputs, _stdio=stdio)

type_ = SampleData[PairedEndSequencesWithQuality] if paired \
else SampleData[SequencesWithQuality]
format_ = CasavaOneEightSingleLanePerSampleDirFmt
files_to_move = _import_fastq_get_files_to_move(
inputs, paired, _stdio=stdio)

artifact = _import_name_data(type_, format_, files_to_move, _stdio=stdio)
_import_save(artifact, _stdio=stdio)


@error_handler(header='Unexpected error determining if data is paired: ')
def _is_paired(inputs):
# I'm not super convinced this is reliable
return any(x in os.path.basename(inputs['import'][0]['staging_path'])
for x in ('forward', 'reverse'))


@error_handler(header='Unexpected error getting files to move: ')
def _import_fastq_get_files_to_move(inputs, paired):
idx = 0
files_to_move = []
for input_ in inputs['import']:
staging_path = input_['staging_path']
source_path = input_['source_path']

if paired and 'reverse' in os.path.basename(staging_path):
files_to_move.append(
(source_path, _to_casava(staging_path, idx, paired, 'R2')))
else:
files_to_move.append(
(source_path, _to_casava(staging_path, idx, paired, 'R1')))

idx += 1

return files_to_move


# NOTE: If single end data is uploaded with no extension ex:
#
# sampleid
# vs
# sampleid.fastq.gz
#
# Then the user must choose the correct type during upload (.fastq.gz). If they
# leave it on auto then everything goes wrong because galaxy doesn't seem to
# know what type to choose, so we seemingly get None. If they choose the wrong
# one then. . . Well don't choose the wrong one
def _to_casava(path, idx, paired, dir):
# This only works if for paired they rename the pair to only the sample-id
# and for single their filename is only the sample-id
if paired:
sample_id = os.path.split(path)[0]
else:
# Splitting on just .fastq and taking the first thing ought to work for
# any extension we are likely to see provided they don't put .fastq in
# their sampleid, and I'm fine with not letting them do that
sample_id = path.split('.fastq')[0]

return f"{sample_id}_{idx}_L001_{dir}_001.fastq.gz"


@error_handler(header='Unexpected error collecting arguments: ')
def _import_get_args(inputs):
type_ = qiime2.sdk.parse_type(inputs.pop('type'))
Expand Down
3 changes: 3 additions & 0 deletions q2galaxy/core/templaters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from q2galaxy.core.templaters.action import make_tool
from q2galaxy.core.templaters.common import make_tool_id
from q2galaxy.core.templaters.import_data import make_builtin_import
from q2galaxy.core.templaters.import_fastq_data import \
make_builtin_import_fastq
from q2galaxy.core.templaters.export_data import make_builtin_export
# from q2galaxy.core.templaters.qza_to_tabular import make_builtin_to_tabular


BUILTIN_MAKERS = types.MappingProxyType({
make_tool_id('tools', 'import'): make_builtin_import,
make_tool_id('tools', 'import_fastq'): make_builtin_import_fastq,
make_tool_id('tools', 'export'): make_builtin_export,
# make_tool_id('tools', 'qza_to_tabular'): make_builtin_to_tabular,
})
Expand Down
59 changes: 59 additions & 0 deletions q2galaxy/core/templaters/import_fastq_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2018-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from qiime2.sdk import PluginManager

from q2galaxy.core.util import XMLNode
from q2galaxy.core.templaters.common import (make_builtin_version,
make_tool_name_from_id,
make_requirements,
make_citations)


def make_builtin_import_fastq(meta, tool_id):
pm = PluginManager()

plugins = set()
for record in sorted(pm.get_semantic_types().values(),
key=lambda x: str(x.semantic_type)):
plugins.add(record.plugin)

tool = XMLNode('tool', id=tool_id, name=make_tool_name_from_id(tool_id),
version=make_builtin_version(plugins))
tool.append(_make_input())
tool.append(_make_output())
tool.append(
XMLNode('command', "q2galaxy run tools import-fastq '$inputs'"))
tool.append(XMLNode('description',
'Import fastq data into a QIIME 2 artifact'))
tool.append(_make_config())
tool.append(make_citations())
tool.append(make_requirements(meta, *[p.project_name for p in plugins]))
return tool


def _make_config():
configfiles = XMLNode('configfiles')
configfiles.append(XMLNode(
'inputs', name='inputs', data_style='staging_path_and_source_path'))
return configfiles


def _make_input():
inputs = XMLNode('inputs')
inputs.append(XMLNode(
'param', name='import', type='data_collection',
collection_type='list, list:paired'))
return inputs


def _make_output():
outputs = XMLNode('outputs')
outputs.append(XMLNode(
'data', name='imported-data', format='qza',
from_work_dir='imported_data.qza'))
return outputs
Loading