From 118d832eb02e6d98607b7c9475c0f1e5caeff9a8 Mon Sep 17 00:00:00 2001 From: Marcus Stoiber Date: Thu, 3 Oct 2019 10:09:38 -0700 Subject: [PATCH] Bug fixes and cleanup from last commit. --- megalodon/aggregate.py | 4 ++-- megalodon/megalodon.py | 10 +++++++--- megalodon/mods.py | 9 +++++++-- megalodon/variants.py | 9 +++++++-- scripts/run_aggregation.py | 10 ++++------ 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/megalodon/aggregate.py b/megalodon/aggregate.py index d83d813..5bf06df 100644 --- a/megalodon/aggregate.py +++ b/megalodon/aggregate.py @@ -256,7 +256,7 @@ def aggregate_stats( agg_vars = variants.AggVars( vars_db_fn, load_in_mem_indices=False) num_vars = agg_vars.num_uniq() - ref_names_and_lens = agg_vars.get_all_chrm_and_lens() + ref_names_and_lens = agg_vars.vars_db.get_all_chrm_and_lens() agg_vars.close() logger.info('Spawning variant aggregation processes.') # create process to collect var stats from workers @@ -286,7 +286,7 @@ def aggregate_stats( mods_db_fn = mh.get_megalodon_fn(out_dir, mh.PR_MOD_NAME) agg_mods = mods.AggMods(mods_db_fn, load_in_mem_indices=False) num_mods = agg_mods.num_uniq() - ref_names_and_lens = agg_mods.get_all_chrm_and_lens() + ref_names_and_lens = agg_mods.mods_db.get_all_chrm_and_lens() agg_mods.close() logger.info('Spawning modified base aggregation processes.') # create process to collect mods stats from workers diff --git a/megalodon/megalodon.py b/megalodon/megalodon.py index f991f4d..41795d9 100644 --- a/megalodon/megalodon.py +++ b/megalodon/megalodon.py @@ -1017,16 +1017,20 @@ def _main(): args.processes, args.verbose_read_progress, args.suppress_progress, mods_info, args.database_safety, pr_ref_filts) - if aligner is not None: aligner.close() + if aligner is not None: + ref_fn = aligner.ref_fn + map_out_fmt = aligner.out_fmt + del aligner + if mh.MAP_NAME in args.outputs: logger.info('Spawning process to sort mappings') map_p = post_process_mapping( - args.output_directory, aligner.out_fmt, aligner.ref_fn) + args.output_directory, map_out_fmt, ref_fn) if mh.WHATSHAP_MAP_NAME in args.outputs: logger.info('Spawning process to sort whatshap mappings') whatshap_sort_fn, whatshap_p = post_process_whatshap( - args.output_directory, aligner.out_fmt, aligner.ref_fn) + args.output_directory, map_out_fmt, ref_fn) if mh.VAR_NAME in args.outputs or mh.MOD_NAME in args.outputs: post_process_aggregate( diff --git a/megalodon/mods.py b/megalodon/mods.py index b6e7190..2ff19d5 100644 --- a/megalodon/mods.py +++ b/megalodon/mods.py @@ -312,8 +312,13 @@ def get_chrm(self, chrm_id): return chrm def get_all_chrm_and_lens(self): - return tuple(map(tuple, zip(*self.cur.execute( - 'SELECT chrm, chrm_len FROM chrm').fetchall()))) + try: + return tuple(map(tuple, zip(*self.cur.execute( + 'SELECT chrm, chrm_len FROM chrm').fetchall()))) + except sqlite3.OperationalError: + raise mh.MegaError( + 'Old megalodon database scheme detected. Please re-run ' + + 'megalodon processing or downgrade megalodon installation.') def get_mod_base_data(self, mod_id): try: diff --git a/megalodon/variants.py b/megalodon/variants.py index b564d4e..05b4d1d 100755 --- a/megalodon/variants.py +++ b/megalodon/variants.py @@ -340,8 +340,13 @@ def get_chrm(self, chrm_id): return chrm def get_all_chrm_and_lens(self): - return tuple(map(tuple, zip(*self.cur.execute( - 'SELECT chrm, chrm_len FROM chrm').fetchall()))) + try: + return tuple(map(tuple, zip(*self.cur.execute( + 'SELECT chrm, chrm_len FROM chrm').fetchall()))) + except sqlite3.OperationalError: + raise mh.MegaError( + 'Old megalodon database scheme detected. Please re-run ' + + 'megalodon processing or downgrade megalodon installation.') def get_alt_seq(self, alt_id): try: diff --git a/scripts/run_aggregation.py b/scripts/run_aggregation.py index f55d597..2e0cf0c 100644 --- a/scripts/run_aggregation.py +++ b/scripts/run_aggregation.py @@ -87,7 +87,7 @@ def main(): args = get_parser().parse_args() log_suffix = ('aggregation' if args.output_suffix is None else 'aggregation.' + args.output_suffix) - logging.init_logger(args.output_directory, out_suffix=log_suffix) + logging.init_logger(args.megalodon_directory, out_suffix=log_suffix) logger = logging.get_logger() mod_agg_info = mods.AGG_INFO( @@ -97,24 +97,22 @@ def main(): logger.info('Loading model.') mod_names = backends.ModelInfo(mh.get_model_fn( args.taiyaki_model_filename)).mod_long_names - if args.reference is not None: logger.info('Loading reference.') valid_read_ids = None if args.read_ids_filename is not None: with open(args.read_ids_filename) as read_ids_fp: valid_read_ids = set(line.strip() for line in read_ids_fp) aggregate.aggregate_stats( - args.outputs, args.output_directory, args.processes, + args.outputs, args.megalodon_directory, args.processes, args.write_vcf_log_probs, args.heterozygous_factors, variants.HAPLIOD_MODE if args.haploid else variants.DIPLOID_MODE, mod_names, mod_agg_info, args.write_mod_log_probs, args.mod_output_formats, args.suppress_progress, valid_read_ids, args.output_suffix) - # note reference is required in order to annotate contigs for VCF writing - if mh.VAR_NAME in args.outputs and args.reference is not None: + if mh.VAR_NAME in args.outputs: logger.info('Sorting output variant file') variant_fn = mh.add_fn_suffix( - mh.get_megalodon_fn(args.output_directory, mh.VAR_NAME), + mh.get_megalodon_fn(args.megalodon_directory, mh.VAR_NAME), args.output_suffix) sort_variant_fn = mh.add_fn_suffix(variant_fn, 'sorted') variants.sort_variants(variant_fn, sort_variant_fn)