diff --git a/dorado/data_loader/DataLoader.cpp b/dorado/data_loader/DataLoader.cpp index c9d5297ae..1e5d03fb0 100644 --- a/dorado/data_loader/DataLoader.cpp +++ b/dorado/data_loader/DataLoader.cpp @@ -177,8 +177,7 @@ SimplexReadPtr process_pod5_thread_fn( new_read->read_common.read_id = std::move(read_id_str); new_read->read_common.num_trimmed_samples = 0; new_read->read_common.attributes.read_number = read_data.read_number; - new_read->read_common.attributes.fast5_filename = - std::filesystem::path(path.c_str()).filename().string(); + new_read->read_common.attributes.filename = std::filesystem::path(path).filename().string(); new_read->read_common.attributes.mux = read_data.well; new_read->read_common.attributes.num_samples = read_data.num_samples; new_read->read_common.attributes.channel_number = read_data.channel; @@ -271,176 +270,168 @@ void DataLoader::load_reads(const std::filesystem::path& path, return; } - auto iterate_directory = [&](const auto& iterator) { - switch (traversal_order) { - case ReadOrder::BY_CHANNEL: - // If traversal in channel order is required, the following algorithm - // is used - - // 1. iterate through all the read metadata to collect channel information - // across all pod5 files - // 2. store the read list sorted by channel number - spdlog::info("> Reading read channel info"); - load_read_channels(path, recursive_file_loading); - spdlog::info("> Processed read channel info"); - // 3. for each channel, iterate through all files and in each iteration - // only load the reads that correspond to that channel. - for (int channel = 0; channel <= m_max_channel; channel++) { - if (m_reads_by_channel.find(channel) != m_reads_by_channel.end()) { - // Sort the read ids within a channel by its mux - // and start time. - spdlog::debug("Sort channel {}", channel); - auto& reads = m_reads_by_channel.at(channel); - std::sort(reads.begin(), reads.end(), [](ReadSortInfo& a, ReadSortInfo& b) { - if (a.mux != b.mux) { - return a.mux < b.mux; - } else { - return a.read_number < b.read_number; - } - }); - // Once sorted, create a hash table from read id - // to index in the sorted list to quickly fetch the - // read location and its neighbors. - for (size_t i = 0; i < reads.size(); i++) { - m_read_id_to_index[reads[i].read_id] = i; - } - spdlog::debug("Sorted channel {}", channel); - } - for (const auto& entry : iterator) { - if (m_loaded_read_count == m_max_reads) { - break; - } - auto entry_path = std::filesystem::path(entry); - std::string ext = entry_path.extension().string(); - std::transform(ext.begin(), ext.end(), ext.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (ext == ".fast5") { - throw std::runtime_error( - "Traversing reads by channel is only available for POD5. " - "Encountered FAST5 at " + - entry_path.string()); - } else if (ext == ".pod5") { - auto& channel_to_read_ids = - m_file_channel_read_order_map.at(entry_path.string()); - auto& read_ids = channel_to_read_ids[channel]; - if (!read_ids.empty()) { - load_pod5_reads_from_file_by_read_ids(entry_path.string(), read_ids); - } + const auto filtered_entries = filter_fast5_for_mixed_datasets( + utils::fetch_directory_entries(path, recursive_file_loading)); + + switch (traversal_order) { + case ReadOrder::BY_CHANNEL: + // If traversal in channel order is required, the following algorithm + // is used - + // 1. iterate through all the read metadata to collect channel information + // across all pod5 files + // 2. store the read list sorted by channel number + spdlog::info("> Reading read channel info"); + load_read_channels(path, recursive_file_loading); + spdlog::info("> Processed read channel info"); + // 3. for each channel, iterate through all files and in each iteration + // only load the reads that correspond to that channel. + for (int channel = 0; channel <= m_max_channel; channel++) { + if (m_reads_by_channel.find(channel) != m_reads_by_channel.end()) { + // Sort the read ids within a channel by its mux + // and start time. + spdlog::debug("Sort channel {}", channel); + auto& reads = m_reads_by_channel.at(channel); + std::sort(reads.begin(), reads.end(), [](ReadSortInfo& a, ReadSortInfo& b) { + if (a.mux != b.mux) { + return a.mux < b.mux; + } else { + return a.read_number < b.read_number; } + }); + // Once sorted, create a hash table from read id + // to index in the sorted list to quickly fetch the + // read location and its neighbors. + for (size_t i = 0; i < reads.size(); i++) { + m_read_id_to_index[reads[i].read_id] = i; } - // Erase sorted list as it's not needed anymore. - m_reads_by_channel.erase(channel); + spdlog::debug("Sorted channel {}", channel); } - break; - case ReadOrder::UNRESTRICTED: - for (const auto& entry : iterator) { + for (const auto& entry : filtered_entries) { if (m_loaded_read_count == m_max_reads) { break; } - spdlog::debug("Load reads from file {}", entry.path().string()); - std::string ext = std::filesystem::path(entry).extension().string(); + auto entry_path = std::filesystem::path(entry); + std::string ext = entry_path.extension().string(); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) { return std::tolower(c); }); if (ext == ".fast5") { - load_fast5_reads_from_file(entry.path().string()); + throw std::runtime_error( + "Traversing reads by channel is only available for POD5. " + "Encountered FAST5 at " + + entry_path.string()); } else if (ext == ".pod5") { - load_pod5_reads_from_file(entry.path().string()); + auto& channel_to_read_ids = + m_file_channel_read_order_map.at(entry_path.string()); + auto& read_ids = channel_to_read_ids[channel]; + if (!read_ids.empty()) { + load_pod5_reads_from_file_by_read_ids(entry_path.string(), read_ids); + } } } - break; - default: - throw std::runtime_error("Unsupported traversal order detected: " + - dorado::to_string(traversal_order)); + // Erase sorted list as it's not needed anymore. + m_reads_by_channel.erase(channel); } - }; - - auto filtered_entries = filter_fast5_for_mixed_datasets( - utils::fetch_directory_entries(path, recursive_file_loading)); - iterate_directory(filtered_entries); + break; + case ReadOrder::UNRESTRICTED: + for (const auto& entry : filtered_entries) { + if (m_loaded_read_count == m_max_reads) { + break; + } + spdlog::debug("Load reads from file {}", entry.path().string()); + std::string ext = std::filesystem::path(entry).extension().string(); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (ext == ".fast5") { + load_fast5_reads_from_file(entry.path().string()); + } else if (ext == ".pod5") { + load_pod5_reads_from_file(entry.path().string()); + } + } + break; + default: + throw std::runtime_error("Unsupported traversal order detected: " + + dorado::to_string(traversal_order)); + } } void DataLoader::load_read_channels(const std::filesystem::path& data_path, bool recursive_file_loading) { - auto iterate_directory = [&](const auto& iterator) { - for (const auto& entry : iterator) { - auto file_path = std::filesystem::path(entry); - std::string ext = file_path.extension().string(); - std::transform(ext.begin(), ext.end(), ext.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (ext != ".pod5") { - continue; - } - pod5_init(); + const auto entries = utils::fetch_directory_entries(data_path, recursive_file_loading); + for (const auto& entry : entries) { + auto file_path = std::filesystem::path(entry); + std::string ext = file_path.extension().string(); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (ext != ".pod5") { + continue; + } + pod5_init(); + + // Use a std::map to store by sorted channel order. + m_file_channel_read_order_map.emplace(file_path.string(), channel_to_read_id_t()); + auto& channel_to_read_id = m_file_channel_read_order_map[file_path.string()]; - // Use a std::map to store by sorted channel order. - m_file_channel_read_order_map.emplace(file_path.string(), channel_to_read_id_t()); - auto& channel_to_read_id = m_file_channel_read_order_map[file_path.string()]; + // Open the file ready for walking: + Pod5FileReader_t* file = pod5_open_file(file_path.string().c_str()); - // Open the file ready for walking: - Pod5FileReader_t* file = pod5_open_file(file_path.string().c_str()); + if (!file) { + spdlog::error("Failed to open file {}: {}", file_path.string().c_str(), + pod5_get_error_string()); + continue; + } + std::size_t batch_count = 0; + if (pod5_get_read_batch_count(&batch_count, file) != POD5_OK) { + spdlog::error("Failed to query batch count: {}", pod5_get_error_string()); + } - if (!file) { - spdlog::error("Failed to open file {}: {}", file_path.string().c_str(), - pod5_get_error_string()); + for (std::size_t batch_index = 0; batch_index < batch_count; ++batch_index) { + Pod5ReadRecordBatch_t* batch = nullptr; + if (pod5_get_read_batch(&batch, file, batch_index) != POD5_OK) { + spdlog::error("Failed to get batch: {}", pod5_get_error_string()); continue; } - std::size_t batch_count = 0; - if (pod5_get_read_batch_count(&batch_count, file) != POD5_OK) { - spdlog::error("Failed to query batch count: {}", pod5_get_error_string()); - } - for (std::size_t batch_index = 0; batch_index < batch_count; ++batch_index) { - Pod5ReadRecordBatch_t* batch = nullptr; - if (pod5_get_read_batch(&batch, file, batch_index) != POD5_OK) { - spdlog::error("Failed to get batch: {}", pod5_get_error_string()); - continue; - } + std::size_t batch_row_count = 0; + if (pod5_get_read_batch_row_count(&batch_row_count, batch) != POD5_OK) { + spdlog::error("Failed to get batch row count"); + continue; + } - std::size_t batch_row_count = 0; - if (pod5_get_read_batch_row_count(&batch_row_count, batch) != POD5_OK) { - spdlog::error("Failed to get batch row count"); + for (std::size_t row = 0; row < batch_row_count; ++row) { + uint16_t read_table_version = 0; + ReadBatchRowInfo_t read_data; + if (pod5_get_read_batch_row_info_data(batch, row, READ_BATCH_ROW_INFO_VERSION, + &read_data, &read_table_version) != POD5_OK) { + spdlog::error("Failed to get read {}", row); continue; } - for (std::size_t row = 0; row < batch_row_count; ++row) { - uint16_t read_table_version = 0; - ReadBatchRowInfo_t read_data; - if (pod5_get_read_batch_row_info_data(batch, row, READ_BATCH_ROW_INFO_VERSION, - &read_data, - &read_table_version) != POD5_OK) { - spdlog::error("Failed to get read {}", row); - continue; - } - - int channel = read_data.channel; - - // Update maximum number of channels encountered. - m_max_channel = std::max(m_max_channel, channel); + int channel = read_data.channel; - // Store the read_id in the channel's list. - ReadID read_id; - std::memcpy(read_id.data(), read_data.read_id, POD5_READ_ID_SIZE); - channel_to_read_id[channel].push_back(read_id); + // Update maximum number of channels encountered. + m_max_channel = std::max(m_max_channel, channel); - char read_id_tmp[POD5_READ_ID_LEN]; - if (pod5_format_read_id(read_data.read_id, read_id_tmp) != POD5_OK) { - spdlog::error("Failed to format read id"); - } - std::string rid(read_id_tmp); - m_reads_by_channel[channel].push_back( - {rid, read_data.well, read_data.read_number}); - } + // Store the read_id in the channel's list. + ReadID read_id; + std::memcpy(read_id.data(), read_data.read_id, POD5_READ_ID_SIZE); + channel_to_read_id[channel].push_back(read_id); - if (pod5_free_read_batch(batch) != POD5_OK) { - spdlog::error("Failed to release batch"); + char read_id_tmp[POD5_READ_ID_LEN]; + if (pod5_format_read_id(read_data.read_id, read_id_tmp) != POD5_OK) { + spdlog::error("Failed to format read id"); } + std::string rid(read_id_tmp); + m_reads_by_channel[channel].push_back({rid, read_data.well, read_data.read_number}); } - if (pod5_close_and_free_reader(file) != POD5_OK) { - spdlog::error("Failed to close and free POD5 reader"); + + if (pod5_free_read_batch(batch) != POD5_OK) { + spdlog::error("Failed to release batch"); } } - }; - - iterate_directory(utils::fetch_directory_entries(data_path, recursive_file_loading)); + if (pod5_close_and_free_reader(file) != POD5_OK) { + spdlog::error("Failed to close and free POD5 reader"); + } + } } void DataLoader::load_pod5_reads_from_file_by_read_ids(const std::string& path, @@ -658,7 +649,7 @@ void DataLoader::load_fast5_reads_from_file(const std::string& path) { start_time_attr.read(start_time); string_reader(read_id_attr, read_id); - std::string fast5_filename = std::filesystem::path(path).filename().string(); + std::string filename = std::filesystem::path(path).filename().string(); HighFive::Group tracking_id_group = read.getGroup("tracking_id"); std::string exp_start_time = get_string_attribute(tracking_id_group, "exp_start_time"); @@ -685,7 +676,7 @@ void DataLoader::load_fast5_reads_from_file(const std::string& path) { new_read->read_common.attributes.read_number = read_number; new_read->read_common.attributes.channel_number = channel_number; new_read->read_common.attributes.start_time = start_time_str; - new_read->read_common.attributes.fast5_filename = fast5_filename; + new_read->read_common.attributes.filename = filename; new_read->read_common.flowcell_id = flow_cell_id; new_read->read_common.flow_cell_product_code = flow_cell_product_code; new_read->read_common.position_id = device_id; diff --git a/dorado/read_pipeline/HtsReader.cpp b/dorado/read_pipeline/HtsReader.cpp index 58c39246d..84f9868da 100644 --- a/dorado/read_pipeline/HtsReader.cpp +++ b/dorado/read_pipeline/HtsReader.cpp @@ -160,15 +160,27 @@ HtsReader::HtsReader(const std::string& filename, } template -bool HtsReader::try_initialise_generator(const std::string& filename) { - auto generator = std::make_shared(filename); // shared to allow copy assignment +bool HtsReader::try_initialise_generator(const std::string& filepath) { + auto generator = std::make_shared(filepath); // shared to allow copy assignment if (!generator->is_valid()) { return false; } m_header = generator->header(); m_format = generator->format(); - m_bam_record_generator = [generator_ = std::move(generator)](bam1_t& bam_record) { - return generator_->try_get_next_record(bam_record); + m_bam_record_generator = [generator_ = std::move(generator), + filename = std::filesystem::path(filepath).filename().string(), + this](bam1_t& bam_record) { + if (!generator_->try_get_next_record(bam_record)) { + return false; + } + + // If the record doesn't have a filename set then say that it came from the currently processing file. + if (m_add_filename_tag && !bam_aux_get(&bam_record, "fn")) { + bam_aux_append(&bam_record, "fn", 'Z', static_cast(filename.size() + 1), + reinterpret_cast(filename.c_str())); + } + + return true; }; return true; } diff --git a/dorado/read_pipeline/HtsReader.h b/dorado/read_pipeline/HtsReader.h index a3a5a4e5c..2101841b2 100644 --- a/dorado/read_pipeline/HtsReader.h +++ b/dorado/read_pipeline/HtsReader.h @@ -25,6 +25,11 @@ class HtsReader { public: HtsReader(const std::string& filename, std::optional> read_list); + + // By default we'll add a filename tag to each record to match the current file + // if one isn't included in the data, but that can be disabled with this method. + void set_add_filename_tag(bool should) { m_add_filename_tag = should; } + bool read(); // If reading directly into a pipeline need to set the client info on the messages @@ -36,7 +41,7 @@ class HtsReader { void set_record_mutator(std::function mutator); bool is_aligned{false}; - BamPtr record{nullptr}; + BamPtr record; sam_hdr_t* header(); const sam_hdr_t* header() const; @@ -44,13 +49,14 @@ class HtsReader { private: sam_hdr_t* m_header{nullptr}; // non-owning - std::string m_format{}; + std::string m_format; std::shared_ptr m_client_info; - std::function m_record_mutator{}; + std::function m_record_mutator; std::optional> m_read_list; - std::function m_bam_record_generator{}; + std::function m_bam_record_generator; + bool m_add_filename_tag{true}; template bool try_initialise_generator(const std::string& filename); @@ -69,7 +75,8 @@ T HtsReader::get_tag(const char* tagname) { } else if constexpr (std::is_floating_point_v) { tag_value = static_cast(bam_aux2f(tag)); } else { - tag_value = static_cast(bam_aux2Z(tag)); + const char* val = bam_aux2Z(tag); + tag_value = val ? val : T{}; } return tag_value; diff --git a/dorado/read_pipeline/messages.cpp b/dorado/read_pipeline/messages.cpp index 4390d9a2d..39c92d199 100644 --- a/dorado/read_pipeline/messages.cpp +++ b/dorado/read_pipeline/messages.cpp @@ -62,8 +62,8 @@ void ReadCommon::generate_read_tags(bam1_t *aln, bool emit_moves, bool is_duplex int rn = attributes.read_number; bam_aux_append(aln, "rn", 'i', sizeof(rn), (uint8_t *)&rn); - bam_aux_append(aln, "fn", 'Z', int(attributes.fast5_filename.length() + 1), - (uint8_t *)attributes.fast5_filename.c_str()); + bam_aux_append(aln, "fn", 'Z', int(attributes.filename.length() + 1), + (uint8_t *)attributes.filename.c_str()); float sm = shift; bam_aux_append(aln, "sm", 'f', sizeof(sm), (uint8_t *)&sm); diff --git a/dorado/read_pipeline/messages.h b/dorado/read_pipeline/messages.h index 1bd5818fb..09d35bf5c 100644 --- a/dorado/read_pipeline/messages.h +++ b/dorado/read_pipeline/messages.h @@ -25,10 +25,12 @@ struct Attributes { int32_t read_number{-1}; // Per-channel number of each read as it was acquired by minknow int32_t channel_number{-1}; //Channel ID std::string start_time{}; //Read acquisition start time - std::string fast5_filename{}; - uint64_t num_samples; + std::string filename{}; // Indicates if this read had end reason `mux_change` or `unblock_mux_change` bool is_end_reason_mux_change{false}; + + // Only used by tests, and only valid for POD5 data. + uint64_t num_samples{}; }; } // namespace details diff --git a/dorado/summary/summary.cpp b/dorado/summary/summary.cpp index 66f7a732e..5e0167da8 100644 --- a/dorado/summary/summary.cpp +++ b/dorado/summary/summary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace { @@ -37,27 +38,44 @@ volatile sig_atomic_t SigIntHandler::interrupt{}; namespace dorado { -std::vector SummaryData::s_required_fields = {"filename", "read_id"}; +namespace { + +using namespace std::string_view_literals; + +const std::array s_required_fields = { + "filename"sv, + "read_id"sv, +}; + +const std::array s_general_fields = { + "run_id"sv, + "channel"sv, + "mux"sv, + "start_time"sv, + "duration"sv, + "template_start"sv, + "template_duration"sv, + "sequence_length_template"sv, + "mean_qscore_template"sv, +}; -std::vector SummaryData::s_general_fields = {"run_id", - "channel", - "mux", - "start_time", - "duration", - "template_start", - "template_duration", - "sequence_length_template", - "mean_qscore_template"}; +const std::array s_barcoding_fields = { + "barcode"sv, +}; -std::vector SummaryData::s_barcoding_fields = {"barcode"}; +const std::array s_alignment_fields = { + "alignment_genome"sv, "alignment_genome_start"sv, + "alignment_genome_end"sv, "alignment_strand_start"sv, + "alignment_strand_end"sv, "alignment_direction"sv, + "alignment_length"sv, "alignment_num_aligned"sv, + "alignment_num_correct"sv, "alignment_num_insertions"sv, + "alignment_num_deletions"sv, "alignment_num_substitutions"sv, + "alignment_mapq"sv, "alignment_strand_coverage"sv, + "alignment_identity"sv, "alignment_accuracy"sv, + "alignment_bed_hits"sv, +}; -std::vector SummaryData::s_alignment_fields = { - "alignment_genome", "alignment_genome_start", "alignment_genome_end", - "alignment_strand_start", "alignment_strand_end", "alignment_direction", - "alignment_length", "alignment_num_aligned", "alignment_num_correct", - "alignment_num_insertions", "alignment_num_deletions", "alignment_num_substitutions", - "alignment_mapq", "alignment_strand_coverage", "alignment_identity", - "alignment_accuracy", "alignment_bed_hits"}; +} // namespace SummaryData::SummaryData() = default; @@ -73,7 +91,7 @@ void SummaryData::set_fields(FieldFlags flags) { m_field_flags = flags; } -bool SummaryData::process_file(const std::string& filename, std::ostream& writer) { +void SummaryData::process_file(const std::string& filename, std::ostream& writer) { SigIntHandler sig_handler; HtsReader reader(filename, std::nullopt); m_field_flags = GENERAL_FIELDS | BARCODING_FIELDS; @@ -82,7 +100,7 @@ bool SummaryData::process_file(const std::string& filename, std::ostream& writer } auto read_group_exp_start_time = utils::get_read_group_info(reader.header(), "DT"); write_header(writer); - return write_rows_from_reader(reader, writer, read_group_exp_start_time); + write_rows_from_reader(reader, writer, read_group_exp_start_time); } bool SummaryData::process_tree(const std::string& folder, std::ostream& writer) { @@ -104,10 +122,7 @@ bool SummaryData::process_tree(const std::string& folder, std::ostream& writer) for (const auto& read_file : files) { HtsReader reader(read_file, std::nullopt); auto read_group_exp_start_time = utils::get_read_group_info(reader.header(), "DT"); - bool ok = write_rows_from_reader(reader, writer, read_group_exp_start_time); - if (!ok) { - spdlog::error("File {} could not be processed. Skipping file.", read_file); - } + write_rows_from_reader(reader, writer, read_group_exp_start_time); } return true; } @@ -137,7 +152,7 @@ void SummaryData::write_header(std::ostream& writer) { writer << '\n'; } -bool SummaryData::write_rows_from_reader( +void SummaryData::write_rows_from_reader( HtsReader& reader, std::ostream& writer, const std::map& read_group_exp_start_time) { @@ -266,7 +281,6 @@ bool SummaryData::write_rows_from_reader( } writer << '\n'; } - return true; } } // namespace dorado diff --git a/dorado/summary/summary.h b/dorado/summary/summary.h index bec105c2c..fab40f854 100644 --- a/dorado/summary/summary.h +++ b/dorado/summary/summary.h @@ -23,22 +23,17 @@ class SummaryData { void set_fields(FieldFlags flags); /// This will automatically set the fields based on the contents of the file. - bool process_file(const std::string& filename, std::ostream& writer); + void process_file(const std::string& filename, std::ostream& writer); /// For this method the fields must already be set. bool process_tree(const std::string& folder, std::ostream& writer); private: - static std::vector s_required_fields; - static std::vector s_general_fields; - static std::vector s_barcoding_fields; - static std::vector s_alignment_fields; - char m_separator{'\t'}; FieldFlags m_field_flags{}; void write_header(std::ostream& writer); - bool write_rows_from_reader(HtsReader& reader, + void write_rows_from_reader(HtsReader& reader, std::ostream& writer, const std::map& rgst); }; diff --git a/tests/AlignerTest.cpp b/tests/AlignerTest.cpp index f991350c2..abe351e6e 100644 --- a/tests/AlignerTest.cpp +++ b/tests/AlignerTest.cpp @@ -99,8 +99,8 @@ class AlignerNodeTestFixture { MessageTypePtr RunPipelineForRead( const std::shared_ptr& loaded_align_info, const std::shared_ptr& client_align_info, - std::string read_id, - std::string sequence) { + const std::string& read_id, + const std::string& sequence) { auto index_file_access = std::make_shared(); auto bed_file_access = std::make_shared(); CHECK(index_file_access->load_index(loaded_align_info->reference_file, @@ -110,16 +110,14 @@ class AlignerNodeTestFixture { create_pipeline(index_file_access, bed_file_access, thread_pool, dorado::utils::concurrency::TaskPriority::normal); - dorado::ReadCommon read_common{}; auto client_info = std::make_shared(); client_info->contexts().register_context( client_align_info); - read_common.client_info = client_info; - read_common.read_id = std::move(read_id); - read_common.seq = std::move(sequence); auto read = std::make_unique(); - read->read_common = std::move(read_common); + read->read_common.client_info = std::move(client_info); + read->read_common.read_id = read_id; + read->read_common.seq = sequence; pipeline->push_message(std::move(read)); pipeline->terminate({}); @@ -544,6 +542,7 @@ TEST_CASE_METHOD(AlignerNodeTestFixture, // Get the sam line from BAM pipeline dorado::HtsReader bam_reader(query, std::nullopt); + bam_reader.set_add_filename_tag(false); auto bam_records = RunPipelineWithBamMessages(bam_reader, ref, "", options, 2); CHECK(bam_records.size() == 1); auto sam_line_from_bam_ptr = get_sam_line_from_bam(std::move(bam_records[0])); @@ -553,23 +552,21 @@ TEST_CASE_METHOD(AlignerNodeTestFixture, auto align_info = std::make_shared(); align_info->minimap_options = options; align_info->reference_file = ref; - auto simplex_read = RunPipelineForRead( - align_info, align_info, std::move(read_id), std::move(sequence)); + auto simplex_read = + RunPipelineForRead(align_info, align_info, read_id, sequence); auto sam_line_from_read_common = std::move(simplex_read->read_common.alignment_results[0].sam_string); // Do the comparison checks - CHECK_FALSE(sam_line_from_read_common.empty()); - - if (sam_line_from_read_common.at(sam_line_from_read_common.size() - 1) == '\n') { - sam_line_from_read_common = - sam_line_from_read_common.substr(0, sam_line_from_read_common.size() - 1); + REQUIRE_FALSE(sam_line_from_read_common.empty()); + if (sam_line_from_read_common.back() == '\n') { + sam_line_from_read_common.resize(sam_line_from_read_common.size() - 1); } const auto bam_fields = dorado::utils::split(sam_line_from_bam_ptr, '\t'); const auto read_common_fields = dorado::utils::split(sam_line_from_read_common, '\t'); - CHECK(bam_fields.size() == read_common_fields.size()); - CHECK(bam_fields.size() >= 11); + REQUIRE(bam_fields.size() == read_common_fields.size()); + REQUIRE(bam_fields.size() >= 11); // first 11 mandatory fields should be identical for (std::size_t field_index{0}; field_index < 11; ++field_index) { CAPTURE(field_index); diff --git a/tests/BamReaderTest.cpp b/tests/BamReaderTest.cpp index db193182a..f16e9c1e1 100644 --- a/tests/BamReaderTest.cpp +++ b/tests/BamReaderTest.cpp @@ -84,10 +84,17 @@ TEST_CASE("HtsReaderTest: get_tag", TEST_GROUP) { dorado::HtsReader reader(sam.string(), std::nullopt); while (reader.read()) { - // All records in small.sam have this set to 0. - CHECK(reader.get_tag("rl") == 0); + // All records in small.sam have these set. + CHECK(reader.get_tag("XA") == 42); + CHECK(reader.get_tag("XB") == "test"); // Intentionally bad tag to test that missing tags don't return garbage. + CHECK(reader.get_tag("##") == 0); CHECK(reader.get_tag("##") == 0); + CHECK(reader.get_tag("##") == ""); + // Type mismatch doesn't crash. + CHECK(reader.get_tag("XB") == 0); + CHECK(reader.get_tag("XB") == 0); + CHECK(reader.get_tag("XA") == ""); } } @@ -129,4 +136,16 @@ TEST_CASE( "read=1728 ch=332 start_time=2017-06-16T15:31:55Z"); } -} // namespace dorado::hts_reader::test \ No newline at end of file +TEST_CASE("HtsReaderTest: filename tag added if missing", TEST_GROUP) { + fs::path aligner_test_dir = fs::path(get_data_dir("bam_reader")); + auto filename = GENERATE("input.fa", "fastq_with_tags.fq"); + auto fasta = aligner_test_dir / filename; + + dorado::HtsReader reader(fasta.string(), std::nullopt); + while (reader.read()) { + // All should be given the name of input file. + CHECK(reader.get_tag("fn") == filename); + } +} + +} // namespace dorado::hts_reader::test diff --git a/tests/BamUtilsTest.cpp b/tests/BamUtilsTest.cpp index ca710eabb..586398640 100644 --- a/tests/BamUtilsTest.cpp +++ b/tests/BamUtilsTest.cpp @@ -294,6 +294,7 @@ TEST_CASE("BamUtilsTest: Remove all alignment tags", TEST_GROUP) { auto sam = bam_utils_test_dir / "aligned_record.bam"; HtsReader reader(sam.string(), std::nullopt); + reader.set_add_filename_tag(false); REQUIRE(reader.read()); // Parse first and only record. auto record = reader.record.get(); diff --git a/tests/FastxRandomReaderTest.cpp b/tests/FastxRandomReaderTest.cpp index 05f951050..45f7c7a6d 100644 --- a/tests/FastxRandomReaderTest.cpp +++ b/tests/FastxRandomReaderTest.cpp @@ -23,7 +23,6 @@ BamPtr generate_bam_entry(const std::string& read_id, TEST_CASE("Check if read can be loaded correctly.", "FastxRandomReader") { auto temp_dir = tests::make_temp_dir("fastx_random_reader_test"); auto temp_input_file = temp_dir.m_path / "input.fq"; - spdlog::info("{}", temp_dir.m_path.string()); const std::string seq = "ACTGATCG"; const std::vector qscore = {20, 20, 30, 30, 20, 20, 40, 40}; diff --git a/tests/NodeSmokeTest.cpp b/tests/NodeSmokeTest.cpp index c7acb1565..f6bbf36e6 100644 --- a/tests/NodeSmokeTest.cpp +++ b/tests/NodeSmokeTest.cpp @@ -79,7 +79,7 @@ class NodeSmokeTestBase { read->read_common.attributes.read_number = 12345; read->read_common.attributes.channel_number = 5; read->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read->read_common.attributes.fast5_filename = "test.fast5"; + read->read_common.attributes.filename = "test.fast5"; read->read_common.client_info = client_info; return read; } diff --git a/tests/ReadFilterNodeTest.cpp b/tests/ReadFilterNodeTest.cpp index f7959b629..a6e7aef8a 100644 --- a/tests/ReadFilterNodeTest.cpp +++ b/tests/ReadFilterNodeTest.cpp @@ -38,7 +38,7 @@ TEST_CASE("ReadFilterNode: Filter read based on qscore", TEST_GROUP) { read_1->read_common.attributes.read_number = 18501; read_1->read_common.attributes.channel_number = 5; read_1->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_1->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_1->read_common.attributes.filename = "batch_0.fast5"; auto read_2 = std::make_unique(); read_2->read_common.raw_data = at::empty(100); @@ -53,7 +53,7 @@ TEST_CASE("ReadFilterNode: Filter read based on qscore", TEST_GROUP) { read_2->read_common.attributes.read_number = 18501; read_2->read_common.attributes.channel_number = 5; read_2->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_2->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_2->read_common.attributes.filename = "batch_0.fast5"; pipeline->push_message(std::move(read_1)); pipeline->push_message(std::move(read_2)); @@ -82,7 +82,7 @@ TEST_CASE("ReadFilterNode: Filter read based on read name", TEST_GROUP) { read_1->read_common.attributes.read_number = 18501; read_1->read_common.attributes.channel_number = 5; read_1->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_1->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_1->read_common.attributes.filename = "batch_0.fast5"; auto read_2 = std::make_unique(); read_2->read_common.raw_data = at::empty(100); @@ -97,7 +97,7 @@ TEST_CASE("ReadFilterNode: Filter read based on read name", TEST_GROUP) { read_2->read_common.attributes.read_number = 18501; read_2->read_common.attributes.channel_number = 5; read_2->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_2->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_2->read_common.attributes.filename = "batch_0.fast5"; pipeline->push_message(std::move(read_1)); pipeline->push_message(std::move(read_2)); @@ -126,7 +126,7 @@ TEST_CASE("ReadFilterNode: Filter read based on read length", TEST_GROUP) { read_1->read_common.attributes.read_number = 18501; read_1->read_common.attributes.channel_number = 5; read_1->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_1->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_1->read_common.attributes.filename = "batch_0.fast5"; auto read_2 = std::make_unique(); read_2->read_common.raw_data = at::empty(100); @@ -141,7 +141,7 @@ TEST_CASE("ReadFilterNode: Filter read based on read length", TEST_GROUP) { read_2->read_common.attributes.read_number = 18501; read_2->read_common.attributes.channel_number = 5; read_2->read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_2->read_common.attributes.fast5_filename = "batch_0.fast5"; + read_2->read_common.attributes.filename = "batch_0.fast5"; pipeline->push_message(std::move(read_1)); pipeline->push_message(std::move(read_2)); diff --git a/tests/ReadTest.cpp b/tests/ReadTest.cpp index 8c050b2ef..3b5108605 100644 --- a/tests/ReadTest.cpp +++ b/tests/ReadTest.cpp @@ -24,7 +24,7 @@ TEST_CASE(TEST_GROUP ": Test tag generation", TEST_GROUP) { read_common.attributes.read_number = 18501; read_common.attributes.channel_number = 5; read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_common.attributes.fast5_filename = "batch_0.fast5"; + read_common.attributes.filename = "batch_0.fast5"; read_common.run_id = "xyz"; read_common.model_name = "test_model"; read_common.is_duplex = false; @@ -178,7 +178,7 @@ TEST_CASE(TEST_GROUP ": Test sam record generation", TEST_GROUP) { test_read.read_common.attributes.read_number = 18501; test_read.read_common.attributes.channel_number = 5; test_read.read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - test_read.read_common.attributes.fast5_filename = "batch_0.fast5"; + test_read.read_common.attributes.filename = "batch_0.fast5"; auto lines = test_read.read_common.extract_sam_lines(false, 0, false); REQUIRE(!lines.empty()); @@ -350,7 +350,7 @@ TEST_CASE(TEST_GROUP ": Test mean q-score generation", TEST_GROUP) { read_common.attributes.read_number = 18501; read_common.attributes.channel_number = 5; read_common.attributes.start_time = "2017-04-29T09:10:04Z"; - read_common.attributes.fast5_filename = "batch_0.fast5"; + read_common.attributes.filename = "batch_0.fast5"; read_common.run_id = "xyz"; read_common.model_name = "test_model"; read_common.is_duplex = false; diff --git a/tests/data/bam_reader/small.sam b/tests/data/bam_reader/small.sam index 8a5ab5e96..2e79b5f4c 100644 --- a/tests/data/bam_reader/small.sam +++ b/tests/data/bam_reader/small.sam @@ -2,14 +2,14 @@ @SQ SN:NZ_CP062160.1 LN:5498557 @SQ SN:NZ_CP062161.1 LN:104844 @PG ID:minimap2 PN:minimap2 VN:2.24-r1155-dirty CL:minimap2 -k 19 -w 19 -H -a --MD -o test2.sam /Users/joyjit.daw/references/ecoli-GCF_014825845.1_ASM1482584v1_genomic.fn /Users/joyjit.daw/Downloads/temp.fasta -f25635d3-3b81-4ff2-8510-f28330590801 4 * 0 0 * * 0 0 TGCTCAGATGGGGGAATGAAAACAATACTCGACAGGAACTACAACGCCACAGCGGCGATTGTATCAGAAAGTTGACACTGGAAGATAAACTGGTGGAGAAACCTGTCACGGGGAAGAGCAGAGCTTGACGCGCTTCGCTTGTCCCGGCCTCACGACCCGCGAAAGAGTTTCGCGAACCGTTCACCGTTTTTCAATATGATTCTGTCTGCTTTGGTCATTAAAGCAGACGATAGTGTCCAACTTCCCCGTATATAACGGTACGTGCTCATCGGTAAGAGCTGGCGGTATTGTGTTTATCAGGGGAAGTTTTCAGATACTCACGAAAATGCCTGGCAGTACCGCCGTCTACAATGCCATCGATAATATGTAGGTGTTTTCACCTAATTTTGCCATGATGATAATCCTTATATTACGGTTTGTGAATAGGTCTGCTTGTCAGGGTTTGTGTAACCTTTTAACCTTAGCAATGCGTAACGCGTGTACTTTGCGTTCAATGGTTACGTATTGCTGGGTTAGAGAGGACAAAGGTTTCAACTGCCCCCAGCACCTCTGGTTAATTGGATAGCGACACTTCCTGTATCATTCGGTGGCGGAAAATCAGTGGAATCGCCGAAATGGCTGGCGCGATTAACCATTTTGCGCCTGGCACCTGCTCTATTTTACCGATTTATCAACTGGCCTCGCCTGCGGCGAATTGTTAGTGGAATGATAGCTGAAGAGCTGCAAAGTTCAGG * rl:i:0 pi:Z:aaaaaaaaaa -95e6aa45-efe0-4ed9-b7b9-2f35164f9836 0 NZ_CP062160.1 1012347 60 1S43M1D8M1D11M1D5M1I10M1D2M5D2M1I37M1I102M2D14M2D25M3D90M2D7M1I35M3D8M1D73M1I28M1D9M1D2M1D9M3D5M3D28M1D21M4D62M1D6M1D5M1D52M1D2M1D39M1D45M1I30M2D119M1D2M2D20M2I76M1D12M1D4M1I176M1D22M780S * 0 0 TGTAATCACGCCGTCTGCCAGTAGGATCACTTTGTTGGCGGTTGAGCGGGATAATGCCAATCGTTCTGCTCAATGCTGCCCGGCATCTCAGCAAAGGCCGCGACACGGGTGCTGAGCTGCCACGACATCCGAGGTGGAGTAGTCTGTTTCAATAGCGATATAAGGAATGTTGTGCTGCTGGCGCACATGACGTTTAATCGCCAGCGATTCCACCGCGTAGGTATCGCGCCTGCAAAATCATCAACTACGCCATCGACCTGATATTCAACCATCTGGCTGAGCATTTGCGGGCGCTGATCGTTCGGCGAAACACAGGAGCAGCCAATCGCCAGATATTTATCCGCCAGCGCGTCATACATCACCCCGTTTCCGCCACGCATTGCTCGGTGGCTTTCGCGGTGCAGTTTCATAACCGACAACCCAGCCGCCATTCTCTTCAATCGCGCGCACCACTTTCTCTGCTGCGCCGCCAATTGGAGCAGCCGGTGATTAAAATACGTGGGCGCGGTCCTGTCATGCCTTCTTCCTGCTGAACGCGGGCGGTCATTGCATCCAGTTCGTGATCAACGCCTCTTTATCGGGCAGGTTGCGCCGTAGACTACTTTCAGAATGTCGCTGCCGCTAAGCGCTGGAGGATTTAACGCACAAATGATAAAGTTGGCCAGTGCGCGACGTTCGCGGTTTTTCAGCGCAATGGCATCGCGCGATATCTTCGCTAATCTCGTGCCCAAAACGTTCTTCTACTGTTTTTGCAAGCGCAGCATCTCGGCTTTCCATAATGCACGCGAGGCGCTCGTCCTTAACGCTGTTTGGCAACTGCATCATGAACGGGCTTAAATTCCGCCATGTATTCATACATTTTCTTTTTGCCGTCACAGGTGGTTTCACCGACCACCAGATCAGAAAAACAGAAGTAGGGGCATTTATCGGTTTTGCCGAAGCGGCTGCTTTTAATCAGCGGGCAGAGAGGTTGCGCGGCAGATCTTTCTCGGCTTCTTCAATGGTTTCATCAGAGGTGGAACATAGCGAAACCACAACCACCCGGCTGCCATCGGATACTCTTGCGGCATAAAGGTGCAGTAGGTGCCAACCAGCGGAATGCCGCGCTCCTTGAGATCCATGACGGTGAGAAAGCCTTTCTGGCGAGCTTCAGAGAACTGATCGAAAATGGCGGGTAGCTCGGTAACAAGTGACATGATTTTCCTTCCCCGTACCACGGGAGATAATGAAAAGAGCCACATTATTACCATTCTTTTGTTGTACTTCGTTCGATTGCATTACTGGGTGGCAGCGGCGTTCGAACGGCACTGATATCAACTGAAACAGACCATGGCGATGAGCTGTTGGGCGAGCAGGAGGAGCTGGTGGGCAACAAGGTCGCGAAAAAGCTGACCACTGACGCTTCTGAATGGAAGTCTTGCTGGATGGCGAAAAGGTAAAGTGAAATGGTCGCTGGTGACGAGCATAATATGCGTAGCTGATGGCGATTGCGGCGGCTCGCCATGTTGGTGTAGCGGCGGATACTCACCAGGTTCGTTTTAACGCTCGTCGCCGTCTGGAGTTGCACCATCCGCCAGCACGGATCTGCTCCAGCTACCTGGCTGTGGTTCTGAATAATGGTGAGCATGTTTTCCCGGCGTTAACCGTAATGGTCATCGGCTCGCGCCATCTGTCGCGGTGACTGACTTGCGGAATATCGGCGTTAGCCATAAAAGCAGAGAACAGCGCAGCCGCATAACTGCAATACGACAGTTAATGGTCTTGACGAGACTATCCGTGTATGTTGTAACTAAACAGAGTTTATCGTATTTGCCCACGGGCGGGACGCAGTCGCTAAGTGGCAGAACAAATGCCACCAGCGCCGGGATAGATAAATAAACCAGACGGCGCGCTTCGCTGACGCTCGGTGCTTCCTGATAGCCAAATCCCTTCAGTGTGCCAAACAGCGAAATTGTGAGTTGAGCACCGCGCTCATATCGAAGGCGATTTCCTGAAGATGGTTCCACAATCCGGCTTCATGAAATGCGCGAATGGCA * NM:i:124 ms:i:1853 AS:i:1832 nn:i:0 tp:A:P cm:i:24 s1:i:396 s2:i:0 de:f:0.0802 MD:Z:17G4A4C5G7A1^A1C1A1A2^C11^A0A1A5C4A1^T2^TTACA8A0T18A11A56A17T25^GG1A12^CA25^TTC3C19T1A64^GT1G5G8T17C7^CCC8^T70C20G2C2A3^G5A3^G0C1^G9^CCA5^GAC3T12C2G8^A0T20^AACC2A13A2C29C8G3^T2C3^G5^A3A48^A2^G0C12T20C2C1^C6T26C11A5T16G6^CA16C35G26C5G0T32^C2^TA3A41C32G15G1^A12^G28A24A70A5G49^G22 rl:i:0 pi:Z:aaaaaaaaaa -13d1b2d7-b6a8-4c58-a709-49f0f341c26f 4 * 0 0 * * 0 0 CTACCCAGATTTTTGTGCAAAACCGTGACGGTACTGATCGGCATGGGGAAATTCTTCGCTATCCTGGTCACCGGTATCGACCTTTCGGTTAGCGCGATTCTGGCGCTTTCCGGTATGGTGACCGCTAAACTGATGTTGGCAGGTATTGATCCGTTTCTCGCGGCGCTAATTGGCGGTGTACTGGTTGGCGGCGCACTGGGGCGATCAACGGCTGCCTGGTCAACTGGACGGGACTACACCCGTTCATTATCACCTTAGCACCAACGCTATTTTCCGTGGGATCACGCTGGTGATCTCCGATGCCAACTCGGTAGCCGACTTCTCATTTGACCGTGAACTTCTTTTGCCGCCAGCGTAATTGGGATACCTGTCCCCGTTATCTTCTCGCTAATTGCTTCGCGCTCATCCTTTGGTTTCTGACAACGCGTATGCGCTCGGGCGCAACATCTACGCACTGGGCGGCAGCAAAAACTCGGCGTTATTCCGGGATTGACGTGAAATAGGTGCTGAAGCGTTGAGTATCCTCTTGGCGCCTTAGCAGTACGTAGCCTTGTGTTGCCGCGTTTCAATTTACGTATTTGCTAAAAGGTTAAACAGGCGACTACAAACAGAATCGACGCTTTACCCGCAAACAATGCCCGTTGAAATTTATCCGCCGTTCATAGCCTCACCTCCGCAAATAACGGATGGCGTAGTTTTACACTGAGAAATGAAAGGATTTGAAAAAACCGCAAAGCGGGCGAAACGATATATACAGAAAGAAAGGAAAGCACTCTATCCAACAAACACACCCACAGTTGATCGGAATAAAAAGCAGA * rl:i:0 -489f7224-7d18-41d4-9129-d28c3e6aa780 16 NZ_CP062160.1 1629814 60 2S9M1I95M1D3M1D53M1I17M1D75M1I46M1I6M1I23M1I22M168S * 0 0 CTTATCATGTCTTTTTGGGTGAGTATTCATCCATAATGCGTCCTCTTCTTAGCGGTTGAACTAACGGACACCTTTCGGGATGGAAAAAACTTACTGACCTGGACTTGCCTTCGTTTGTTAGCTTAACTATAAGCCACTCTTTGCAGGTTTTCATCGCATTATTAATGAAAAATTGCAATTAGAAGGAAGCGGGACAGGACTCACTTTTTCAGCCTAGCACCAACCCGCAGCAGGTAAAAGCAGTTTGCCCGAACTGTTTCATAACATTTCAGAATTATCGCCAGAAGAAATTGGTGACACTAAGTACTGGTGATTGCTCTGAATATGACGTAATAAATCGAGGAATGAATAAAGAATATGTAACGTCCGGCTTCATCCGGATTTTCTGAGGCTACCGTGTTCACCACAACCGTGGTGCTGTTACGTTTTGTTTTGAGCTGAATGTGCAGTTTCAGTCGGTTTCCTGCACCGTCTTTCAGCACGCCTGAAATCTGTACTGCCATATTCACTCCACAAATAAAAAAG * NM:i:22 ms:i:566 AS:i:566 nn:i:0 tp:A:P cm:i:6 s1:i:121 s2:i:0 de:f:0.0615 MD:Z:33C6C63^C3^C54C8A6^G17G12T0A35T22G12T20C0A26C19 rl:i:0 -d7500028-dfcc-4404-b636-13edae804c55 16 NZ_CP062160.1 3164467 1 174M2D20M329S * 0 0 CGCCGCCGGTGCGGCAATCCGGAACGATACCGATGCCGGATCGCCCCGCTGCCCCCACGCATTTACTGCCCGGACTGTCAGCGTGTACCGCCCCAGCGCCAGTTGCGTGAAGCGGTATGTGGTTTCCGTCGTCCGGGCCGTGCTGACCAGCCGCTCACTGCCGTCATCCGCTGCCGGTCAGGCGAAGCATAAAGGTGCTGTCGATTCCGTTTGTAGTCGTCTGTTTAACCTTAGCAATACGTAACTGAACGAAGTACAACACATTTTTTTTTTTTTTTTTTCAGTTACGTATTGCCAAGGTTAAGAGAGGTCAGCGTTTCAACGCTCAGCACCTATCGCTCCGGAAGTGGACGTTGACGACGAGCCAGAAGAAGAATAATTTCATCGCTTTCATGCCAGGGAAAAGGGAGCCATCTCCTCTTTGAATTGAAAAGTCCAGGCTGTAAAGTCTGGGCTTTTGTCGTATTAGGCGCGGTGTTTGGCTGTGCCTCGTAAAAAATGGCTGGCTATACACAAGGAATAG * NM:i:12 ms:i:321 AS:i:320 nn:i:0 tp:A:P cm:i:4 s1:i:70 s2:i:77 de:f:0.0564 SA:Z:NZ_CP062160.1,4112332,+,2S187M334S,19,20; MD:Z:3T29C12T19C15C4G0T0T4G76G2^CA20 rl:i:0 -d7500028-dfcc-4404-b636-13edae804c55 256 NZ_CP062160.1 2003415 0 334S14M2D175M * 0 0 * * NM:i:12 ms:i:311 AS:i:310 nn:i:0 tp:A:S cm:i:3 s1:i:51 de:f:0.0579 MD:Z:14^GT72G12A0A0C4G15G14A4A12G29A3 rl:i:0 -d7500028-dfcc-4404-b636-13edae804c55 256 NZ_CP062160.1 2912586 0 334S14M2D175M * 0 0 * * NM:i:14 ms:i:299 AS:i:298 nn:i:0 tp:A:S cm:i:4 s1:i:77 de:f:0.0684 MD:Z:14^GT40A7A31C4A0A0C4G1T13G14A4A42A3 rl:i:0 -d7500028-dfcc-4404-b636-13edae804c55 272 NZ_CP062160.1 3313878 0 174M2D15M334S * 0 0 * * NM:i:15 ms:i:293 AS:i:292 nn:i:0 tp:A:S cm:i:3 s1:i:58 de:f:0.0737 MD:Z:3T42T4T14C8A6C4G0T0T44T6A20A2G8^CA15 rl:i:0 -d7500028-dfcc-4404-b636-13edae804c55 2048 NZ_CP062160.1 4112332 19 2H31M1I13M3D6M1D52M2D16M4I9M3I2M2D50M334H * 0 0 ATTCCTTGTGTATAGCCAGCCATTTTTTACGAGGCACAGCCAAACACCGCGCCTAATACGACAAAAGCCCAGACTTTACAGCCTGGACTTTTCAATTCAAAGAGGAGATGGCTCCCTTTTCCCTGGCATGAAAGCGATGAAATTATTCTTCTTCTGGCTCGTCGTCAACGTCCACTTCCGGAGCGAT * NM:i:20 ms:i:283 AS:i:274 nn:i:0 tp:A:P cm:i:3 s1:i:64 s2:i:0 de:f:0.0591 SA:Z:NZ_CP062160.1,3164467,-,194M2D329S,1,12; MD:Z:32A11^TTT6^C26G23A1^AG27^AG1A48 rl:i:0 -60588a89-f191-414e-b444-ad0815b7d9c9 4 * 0 0 * * 0 0 CTGCATTTCATCGACCACCGGCTTGGTCTGCCAGGCGATAAGCTCCAGGCAAATTCAGCCAGTGAGTCGGTGGTACGGCATCACCTGCAATAATCGGATACAGTTCCTCTTCCAGGGGAAGCGGAGCATTTCACCGGGTGGAGGAGGGCGTTGGCAGTACCTTGCTGTATTTTTGAGTTTCGAACGTGTCCATAATGAGGTTGATAAAACAACGGTAAAGCGGGTATTTTCATCAGGTATTTACCCACAGGACCGTTGTCGTTATTTTTATTTATTGTCGTTGCCCAGGGCGGCGAGACTAATACTGGTAGCATTTCTGAGTTTGCAAAATCTTCATTGCGGCTATTCCAGTAATTGCTGGCATTGTTATAAGTCCGCGGCGTGTGGCGATAACCTCATGACTAATTTTTCGGCCAGCAGGGTTGCATTGGTAATGAAACCAGCAATGTATGTTTGCGTTTATCTCTTTTTACCCCAATGCTTCGGCTAATGCTAACGCTATTGTGAAGGAAAATTTTCAGCACTTTTTATTAGTCGTTCATAACAGCGTTTACAGTATTATCGGTGGCTTCTATTAGGGCTGAAACAGCGCATAGGGATGGTTGACCATGTAAGTACATCACCAACTGTCATGTTGAGCATAGGGAACCAGGCGGTAGTGCCGTTGCCTTGTTCATAATACTGTAGCTGACCAACTGGTACCATAGATTAAACATCAATGTGTTGATAGTTTTCAAGTATTTCAAGCGTGACAGGATCTGTCGCGCTACCAGTTTTAACATTTCCAGTTGTGGAATATCGGCGAGCTGGTGCAATCGTTGGTATCTCATGGAGCGCTTCAGCGTTGGCATTAAAATAGCAATTAACGGTTGGCGATAGACTGATATATCTGGTAACACAACGAGTAATTTTGCAATACAGTTATTCAGACTTCCTTTTCAGCCAGCGATAAATGTTAG * rl:i:0 -dbd32a76-43d2-40d6-9ccc-610af70a4051 16 NZ_CP062160.1 2846079 60 130S32M3I13M1D9M1I13M4I85M3I16M3I24M1I16M1D86M2I14M3D113M3I1M1I25M5I1M2I191M3I57M2I4M1D30M2I1M2I31M3D59M1D10M2D35M1D31M341S * 0 0 TAAATAAAGTTTGTTTAATGGCTTCGTTTATTCGCCAGGCTGATTAATCACATGCAGGCTATCTGACCGCCAGCACCAAGAATCAGTACATTTTTCATGAAACTTTCCCAGGATTACGCGGCAAGCACATGCCATATCACTATTACGGATTTGAATATGTTTCCCCATTACAGAGTTTCTCAAAGAATCAGAGCTAATACTTCTCGAAATTACTTCAGCCACATAGACTTTGCCATCATAAAGATCCATATGGTTTGCGCCTTCAACAATGTGATAGCGTTTATCCTCGCGCTTTGATGCTCGATCGTACAGCAGGCCGTCACTCATCCATTTGCTCCCCAGCCAGGCTGCTCACATAATCTGTATCGGCTGAGTCAGGTACACTTCCGCCATATGGTAAGCATCATAGGTAATAATCTGGTTAAGGCTGCGCAAAGTAGCGCGTAACCCGGTGCTGGATACTGCGCGCGAGGGGTGTGGTAATATTCCCAGGCCTGACGCAGTTCTTCGTTCGGCGCATCGGACTCCTTCATTGGTGCCAGTGGCATAGTGGCGCATTCTCCGCTGCGTTTCAATATCGCTGGTTCGGGCGTTTGAAATTTTCTGGGGCGTCAACGTATGGTAGGGCATCAACAGATTTCACATTGTTTTCCCAACCATTACGGAACATCGAACCAATATTGACCGCACTAACAGTACCGATGGCCTTGATGCGCGGATCCCGAATTGCAGCATTGGCTGTATATCCTGCACCGGCACAAATTCCCATCGCACCAATTCGGGTATTGTCGACATAACCTGAAAGCGTTGTCAGGTAATCAATCACGGCACTGACGTCTTCAGTACGAATGTATGGGGCGTTTTAACTGACGCGGCTCGCCGCCGCTTTCTTCGTCTTTGATAAGATGCGTCATAAGCAATAGTGACAACTTTTTCCGCCAGTTTTTCGGCATAGGTTCCGGCCGTTTGCTCTTTAACGCCCCCACCTGGTGAGATACCAATTGCCGGATACTGACGGGTTTCATCAAATTTTGAGGGAAATAGATCACTGCAGACAAAGAGATAGGTGCTGAAGCGTTGAAACCTTTGTCCTCTCTTAACCTTAGCAATACGTAACCAAACGTGAAGTGCCGAAAGATAAGCAGCAACAGACGTGACTACGGAATCGACAGCACCCATAGCGTACCGAGCGCCACCAGAAAGATTTTTGCGGATTCATCATCGCGGTACCACAAAAATCACCAATAGAAGCGGATGCCAGGATGCGGCACGTTGCGCAACATCTGAATTGAGGTATCCAGGCAGTCGCTTCACTCCCAGCGACAGCCCGCCAATCAGCCCTTACAGTAGAATTCAATCCCCAGCGATCTCGCCAATTGAAAAGCCAACGAACGCCCGCCAGGAG * NM:i:87 ms:i:1407 AS:i:1380 nn:i:0 tp:A:P cm:i:19 s1:i:360 s2:i:0 de:f:0.0652 MD:Z:14G30^C10G17G14G7G52G9C12T22T3T6C1^G0C2C6C0G88^TGT30C63A5T27T11C0C2T11C10T60G20G0C5T131T3^C21A5A0C33^ATC1C26C11T18^G10^CA13T21^G4A26 rl:i:0 +f25635d3-3b81-4ff2-8510-f28330590801 4 * 0 0 * * 0 0 TGCTCAGATGGGGGAATGAAAACAATACTCGACAGGAACTACAACGCCACAGCGGCGATTGTATCAGAAAGTTGACACTGGAAGATAAACTGGTGGAGAAACCTGTCACGGGGAAGAGCAGAGCTTGACGCGCTTCGCTTGTCCCGGCCTCACGACCCGCGAAAGAGTTTCGCGAACCGTTCACCGTTTTTCAATATGATTCTGTCTGCTTTGGTCATTAAAGCAGACGATAGTGTCCAACTTCCCCGTATATAACGGTACGTGCTCATCGGTAAGAGCTGGCGGTATTGTGTTTATCAGGGGAAGTTTTCAGATACTCACGAAAATGCCTGGCAGTACCGCCGTCTACAATGCCATCGATAATATGTAGGTGTTTTCACCTAATTTTGCCATGATGATAATCCTTATATTACGGTTTGTGAATAGGTCTGCTTGTCAGGGTTTGTGTAACCTTTTAACCTTAGCAATGCGTAACGCGTGTACTTTGCGTTCAATGGTTACGTATTGCTGGGTTAGAGAGGACAAAGGTTTCAACTGCCCCCAGCACCTCTGGTTAATTGGATAGCGACACTTCCTGTATCATTCGGTGGCGGAAAATCAGTGGAATCGCCGAAATGGCTGGCGCGATTAACCATTTTGCGCCTGGCACCTGCTCTATTTTACCGATTTATCAACTGGCCTCGCCTGCGGCGAATTGTTAGTGGAATGATAGCTGAAGAGCTGCAAAGTTCAGG * rl:i:0 pi:Z:aaaaaaaaaa XA:i:42 XB:Z:test +95e6aa45-efe0-4ed9-b7b9-2f35164f9836 0 NZ_CP062160.1 1012347 60 1S43M1D8M1D11M1D5M1I10M1D2M5D2M1I37M1I102M2D14M2D25M3D90M2D7M1I35M3D8M1D73M1I28M1D9M1D2M1D9M3D5M3D28M1D21M4D62M1D6M1D5M1D52M1D2M1D39M1D45M1I30M2D119M1D2M2D20M2I76M1D12M1D4M1I176M1D22M780S * 0 0 TGTAATCACGCCGTCTGCCAGTAGGATCACTTTGTTGGCGGTTGAGCGGGATAATGCCAATCGTTCTGCTCAATGCTGCCCGGCATCTCAGCAAAGGCCGCGACACGGGTGCTGAGCTGCCACGACATCCGAGGTGGAGTAGTCTGTTTCAATAGCGATATAAGGAATGTTGTGCTGCTGGCGCACATGACGTTTAATCGCCAGCGATTCCACCGCGTAGGTATCGCGCCTGCAAAATCATCAACTACGCCATCGACCTGATATTCAACCATCTGGCTGAGCATTTGCGGGCGCTGATCGTTCGGCGAAACACAGGAGCAGCCAATCGCCAGATATTTATCCGCCAGCGCGTCATACATCACCCCGTTTCCGCCACGCATTGCTCGGTGGCTTTCGCGGTGCAGTTTCATAACCGACAACCCAGCCGCCATTCTCTTCAATCGCGCGCACCACTTTCTCTGCTGCGCCGCCAATTGGAGCAGCCGGTGATTAAAATACGTGGGCGCGGTCCTGTCATGCCTTCTTCCTGCTGAACGCGGGCGGTCATTGCATCCAGTTCGTGATCAACGCCTCTTTATCGGGCAGGTTGCGCCGTAGACTACTTTCAGAATGTCGCTGCCGCTAAGCGCTGGAGGATTTAACGCACAAATGATAAAGTTGGCCAGTGCGCGACGTTCGCGGTTTTTCAGCGCAATGGCATCGCGCGATATCTTCGCTAATCTCGTGCCCAAAACGTTCTTCTACTGTTTTTGCAAGCGCAGCATCTCGGCTTTCCATAATGCACGCGAGGCGCTCGTCCTTAACGCTGTTTGGCAACTGCATCATGAACGGGCTTAAATTCCGCCATGTATTCATACATTTTCTTTTTGCCGTCACAGGTGGTTTCACCGACCACCAGATCAGAAAAACAGAAGTAGGGGCATTTATCGGTTTTGCCGAAGCGGCTGCTTTTAATCAGCGGGCAGAGAGGTTGCGCGGCAGATCTTTCTCGGCTTCTTCAATGGTTTCATCAGAGGTGGAACATAGCGAAACCACAACCACCCGGCTGCCATCGGATACTCTTGCGGCATAAAGGTGCAGTAGGTGCCAACCAGCGGAATGCCGCGCTCCTTGAGATCCATGACGGTGAGAAAGCCTTTCTGGCGAGCTTCAGAGAACTGATCGAAAATGGCGGGTAGCTCGGTAACAAGTGACATGATTTTCCTTCCCCGTACCACGGGAGATAATGAAAAGAGCCACATTATTACCATTCTTTTGTTGTACTTCGTTCGATTGCATTACTGGGTGGCAGCGGCGTTCGAACGGCACTGATATCAACTGAAACAGACCATGGCGATGAGCTGTTGGGCGAGCAGGAGGAGCTGGTGGGCAACAAGGTCGCGAAAAAGCTGACCACTGACGCTTCTGAATGGAAGTCTTGCTGGATGGCGAAAAGGTAAAGTGAAATGGTCGCTGGTGACGAGCATAATATGCGTAGCTGATGGCGATTGCGGCGGCTCGCCATGTTGGTGTAGCGGCGGATACTCACCAGGTTCGTTTTAACGCTCGTCGCCGTCTGGAGTTGCACCATCCGCCAGCACGGATCTGCTCCAGCTACCTGGCTGTGGTTCTGAATAATGGTGAGCATGTTTTCCCGGCGTTAACCGTAATGGTCATCGGCTCGCGCCATCTGTCGCGGTGACTGACTTGCGGAATATCGGCGTTAGCCATAAAAGCAGAGAACAGCGCAGCCGCATAACTGCAATACGACAGTTAATGGTCTTGACGAGACTATCCGTGTATGTTGTAACTAAACAGAGTTTATCGTATTTGCCCACGGGCGGGACGCAGTCGCTAAGTGGCAGAACAAATGCCACCAGCGCCGGGATAGATAAATAAACCAGACGGCGCGCTTCGCTGACGCTCGGTGCTTCCTGATAGCCAAATCCCTTCAGTGTGCCAAACAGCGAAATTGTGAGTTGAGCACCGCGCTCATATCGAAGGCGATTTCCTGAAGATGGTTCCACAATCCGGCTTCATGAAATGCGCGAATGGCA * NM:i:124 ms:i:1853 AS:i:1832 nn:i:0 tp:A:P cm:i:24 s1:i:396 s2:i:0 de:f:0.0802 MD:Z:17G4A4C5G7A1^A1C1A1A2^C11^A0A1A5C4A1^T2^TTACA8A0T18A11A56A17T25^GG1A12^CA25^TTC3C19T1A64^GT1G5G8T17C7^CCC8^T70C20G2C2A3^G5A3^G0C1^G9^CCA5^GAC3T12C2G8^A0T20^AACC2A13A2C29C8G3^T2C3^G5^A3A48^A2^G0C12T20C2C1^C6T26C11A5T16G6^CA16C35G26C5G0T32^C2^TA3A41C32G15G1^A12^G28A24A70A5G49^G22 rl:i:0 pi:Z:aaaaaaaaaa XA:i:42 XB:Z:test +13d1b2d7-b6a8-4c58-a709-49f0f341c26f 4 * 0 0 * * 0 0 CTACCCAGATTTTTGTGCAAAACCGTGACGGTACTGATCGGCATGGGGAAATTCTTCGCTATCCTGGTCACCGGTATCGACCTTTCGGTTAGCGCGATTCTGGCGCTTTCCGGTATGGTGACCGCTAAACTGATGTTGGCAGGTATTGATCCGTTTCTCGCGGCGCTAATTGGCGGTGTACTGGTTGGCGGCGCACTGGGGCGATCAACGGCTGCCTGGTCAACTGGACGGGACTACACCCGTTCATTATCACCTTAGCACCAACGCTATTTTCCGTGGGATCACGCTGGTGATCTCCGATGCCAACTCGGTAGCCGACTTCTCATTTGACCGTGAACTTCTTTTGCCGCCAGCGTAATTGGGATACCTGTCCCCGTTATCTTCTCGCTAATTGCTTCGCGCTCATCCTTTGGTTTCTGACAACGCGTATGCGCTCGGGCGCAACATCTACGCACTGGGCGGCAGCAAAAACTCGGCGTTATTCCGGGATTGACGTGAAATAGGTGCTGAAGCGTTGAGTATCCTCTTGGCGCCTTAGCAGTACGTAGCCTTGTGTTGCCGCGTTTCAATTTACGTATTTGCTAAAAGGTTAAACAGGCGACTACAAACAGAATCGACGCTTTACCCGCAAACAATGCCCGTTGAAATTTATCCGCCGTTCATAGCCTCACCTCCGCAAATAACGGATGGCGTAGTTTTACACTGAGAAATGAAAGGATTTGAAAAAACCGCAAAGCGGGCGAAACGATATATACAGAAAGAAAGGAAAGCACTCTATCCAACAAACACACCCACAGTTGATCGGAATAAAAAGCAGA * rl:i:0 XA:i:42 XB:Z:test +489f7224-7d18-41d4-9129-d28c3e6aa780 16 NZ_CP062160.1 1629814 60 2S9M1I95M1D3M1D53M1I17M1D75M1I46M1I6M1I23M1I22M168S * 0 0 CTTATCATGTCTTTTTGGGTGAGTATTCATCCATAATGCGTCCTCTTCTTAGCGGTTGAACTAACGGACACCTTTCGGGATGGAAAAAACTTACTGACCTGGACTTGCCTTCGTTTGTTAGCTTAACTATAAGCCACTCTTTGCAGGTTTTCATCGCATTATTAATGAAAAATTGCAATTAGAAGGAAGCGGGACAGGACTCACTTTTTCAGCCTAGCACCAACCCGCAGCAGGTAAAAGCAGTTTGCCCGAACTGTTTCATAACATTTCAGAATTATCGCCAGAAGAAATTGGTGACACTAAGTACTGGTGATTGCTCTGAATATGACGTAATAAATCGAGGAATGAATAAAGAATATGTAACGTCCGGCTTCATCCGGATTTTCTGAGGCTACCGTGTTCACCACAACCGTGGTGCTGTTACGTTTTGTTTTGAGCTGAATGTGCAGTTTCAGTCGGTTTCCTGCACCGTCTTTCAGCACGCCTGAAATCTGTACTGCCATATTCACTCCACAAATAAAAAAG * NM:i:22 ms:i:566 AS:i:566 nn:i:0 tp:A:P cm:i:6 s1:i:121 s2:i:0 de:f:0.0615 MD:Z:33C6C63^C3^C54C8A6^G17G12T0A35T22G12T20C0A26C19 rl:i:0 XA:i:42 XB:Z:test +d7500028-dfcc-4404-b636-13edae804c55 16 NZ_CP062160.1 3164467 1 174M2D20M329S * 0 0 CGCCGCCGGTGCGGCAATCCGGAACGATACCGATGCCGGATCGCCCCGCTGCCCCCACGCATTTACTGCCCGGACTGTCAGCGTGTACCGCCCCAGCGCCAGTTGCGTGAAGCGGTATGTGGTTTCCGTCGTCCGGGCCGTGCTGACCAGCCGCTCACTGCCGTCATCCGCTGCCGGTCAGGCGAAGCATAAAGGTGCTGTCGATTCCGTTTGTAGTCGTCTGTTTAACCTTAGCAATACGTAACTGAACGAAGTACAACACATTTTTTTTTTTTTTTTTTCAGTTACGTATTGCCAAGGTTAAGAGAGGTCAGCGTTTCAACGCTCAGCACCTATCGCTCCGGAAGTGGACGTTGACGACGAGCCAGAAGAAGAATAATTTCATCGCTTTCATGCCAGGGAAAAGGGAGCCATCTCCTCTTTGAATTGAAAAGTCCAGGCTGTAAAGTCTGGGCTTTTGTCGTATTAGGCGCGGTGTTTGGCTGTGCCTCGTAAAAAATGGCTGGCTATACACAAGGAATAG * NM:i:12 ms:i:321 AS:i:320 nn:i:0 tp:A:P cm:i:4 s1:i:70 s2:i:77 de:f:0.0564 SA:Z:NZ_CP062160.1,4112332,+,2S187M334S,19,20; MD:Z:3T29C12T19C15C4G0T0T4G76G2^CA20 rl:i:0 XA:i:42 XB:Z:test +d7500028-dfcc-4404-b636-13edae804c55 256 NZ_CP062160.1 2003415 0 334S14M2D175M * 0 0 * * NM:i:12 ms:i:311 AS:i:310 nn:i:0 tp:A:S cm:i:3 s1:i:51 de:f:0.0579 MD:Z:14^GT72G12A0A0C4G15G14A4A12G29A3 rl:i:0 XA:i:42 XB:Z:test +d7500028-dfcc-4404-b636-13edae804c55 256 NZ_CP062160.1 2912586 0 334S14M2D175M * 0 0 * * NM:i:14 ms:i:299 AS:i:298 nn:i:0 tp:A:S cm:i:4 s1:i:77 de:f:0.0684 MD:Z:14^GT40A7A31C4A0A0C4G1T13G14A4A42A3 rl:i:0 XA:i:42 XB:Z:test +d7500028-dfcc-4404-b636-13edae804c55 272 NZ_CP062160.1 3313878 0 174M2D15M334S * 0 0 * * NM:i:15 ms:i:293 AS:i:292 nn:i:0 tp:A:S cm:i:3 s1:i:58 de:f:0.0737 MD:Z:3T42T4T14C8A6C4G0T0T44T6A20A2G8^CA15 rl:i:0 XA:i:42 XB:Z:test +d7500028-dfcc-4404-b636-13edae804c55 2048 NZ_CP062160.1 4112332 19 2H31M1I13M3D6M1D52M2D16M4I9M3I2M2D50M334H * 0 0 ATTCCTTGTGTATAGCCAGCCATTTTTTACGAGGCACAGCCAAACACCGCGCCTAATACGACAAAAGCCCAGACTTTACAGCCTGGACTTTTCAATTCAAAGAGGAGATGGCTCCCTTTTCCCTGGCATGAAAGCGATGAAATTATTCTTCTTCTGGCTCGTCGTCAACGTCCACTTCCGGAGCGAT * NM:i:20 ms:i:283 AS:i:274 nn:i:0 tp:A:P cm:i:3 s1:i:64 s2:i:0 de:f:0.0591 SA:Z:NZ_CP062160.1,3164467,-,194M2D329S,1,12; MD:Z:32A11^TTT6^C26G23A1^AG27^AG1A48 rl:i:0 XA:i:42 XB:Z:test +60588a89-f191-414e-b444-ad0815b7d9c9 4 * 0 0 * * 0 0 CTGCATTTCATCGACCACCGGCTTGGTCTGCCAGGCGATAAGCTCCAGGCAAATTCAGCCAGTGAGTCGGTGGTACGGCATCACCTGCAATAATCGGATACAGTTCCTCTTCCAGGGGAAGCGGAGCATTTCACCGGGTGGAGGAGGGCGTTGGCAGTACCTTGCTGTATTTTTGAGTTTCGAACGTGTCCATAATGAGGTTGATAAAACAACGGTAAAGCGGGTATTTTCATCAGGTATTTACCCACAGGACCGTTGTCGTTATTTTTATTTATTGTCGTTGCCCAGGGCGGCGAGACTAATACTGGTAGCATTTCTGAGTTTGCAAAATCTTCATTGCGGCTATTCCAGTAATTGCTGGCATTGTTATAAGTCCGCGGCGTGTGGCGATAACCTCATGACTAATTTTTCGGCCAGCAGGGTTGCATTGGTAATGAAACCAGCAATGTATGTTTGCGTTTATCTCTTTTTACCCCAATGCTTCGGCTAATGCTAACGCTATTGTGAAGGAAAATTTTCAGCACTTTTTATTAGTCGTTCATAACAGCGTTTACAGTATTATCGGTGGCTTCTATTAGGGCTGAAACAGCGCATAGGGATGGTTGACCATGTAAGTACATCACCAACTGTCATGTTGAGCATAGGGAACCAGGCGGTAGTGCCGTTGCCTTGTTCATAATACTGTAGCTGACCAACTGGTACCATAGATTAAACATCAATGTGTTGATAGTTTTCAAGTATTTCAAGCGTGACAGGATCTGTCGCGCTACCAGTTTTAACATTTCCAGTTGTGGAATATCGGCGAGCTGGTGCAATCGTTGGTATCTCATGGAGCGCTTCAGCGTTGGCATTAAAATAGCAATTAACGGTTGGCGATAGACTGATATATCTGGTAACACAACGAGTAATTTTGCAATACAGTTATTCAGACTTCCTTTTCAGCCAGCGATAAATGTTAG * rl:i:0 XA:i:42 XB:Z:test +dbd32a76-43d2-40d6-9ccc-610af70a4051 16 NZ_CP062160.1 2846079 60 130S32M3I13M1D9M1I13M4I85M3I16M3I24M1I16M1D86M2I14M3D113M3I1M1I25M5I1M2I191M3I57M2I4M1D30M2I1M2I31M3D59M1D10M2D35M1D31M341S * 0 0 TAAATAAAGTTTGTTTAATGGCTTCGTTTATTCGCCAGGCTGATTAATCACATGCAGGCTATCTGACCGCCAGCACCAAGAATCAGTACATTTTTCATGAAACTTTCCCAGGATTACGCGGCAAGCACATGCCATATCACTATTACGGATTTGAATATGTTTCCCCATTACAGAGTTTCTCAAAGAATCAGAGCTAATACTTCTCGAAATTACTTCAGCCACATAGACTTTGCCATCATAAAGATCCATATGGTTTGCGCCTTCAACAATGTGATAGCGTTTATCCTCGCGCTTTGATGCTCGATCGTACAGCAGGCCGTCACTCATCCATTTGCTCCCCAGCCAGGCTGCTCACATAATCTGTATCGGCTGAGTCAGGTACACTTCCGCCATATGGTAAGCATCATAGGTAATAATCTGGTTAAGGCTGCGCAAAGTAGCGCGTAACCCGGTGCTGGATACTGCGCGCGAGGGGTGTGGTAATATTCCCAGGCCTGACGCAGTTCTTCGTTCGGCGCATCGGACTCCTTCATTGGTGCCAGTGGCATAGTGGCGCATTCTCCGCTGCGTTTCAATATCGCTGGTTCGGGCGTTTGAAATTTTCTGGGGCGTCAACGTATGGTAGGGCATCAACAGATTTCACATTGTTTTCCCAACCATTACGGAACATCGAACCAATATTGACCGCACTAACAGTACCGATGGCCTTGATGCGCGGATCCCGAATTGCAGCATTGGCTGTATATCCTGCACCGGCACAAATTCCCATCGCACCAATTCGGGTATTGTCGACATAACCTGAAAGCGTTGTCAGGTAATCAATCACGGCACTGACGTCTTCAGTACGAATGTATGGGGCGTTTTAACTGACGCGGCTCGCCGCCGCTTTCTTCGTCTTTGATAAGATGCGTCATAAGCAATAGTGACAACTTTTTCCGCCAGTTTTTCGGCATAGGTTCCGGCCGTTTGCTCTTTAACGCCCCCACCTGGTGAGATACCAATTGCCGGATACTGACGGGTTTCATCAAATTTTGAGGGAAATAGATCACTGCAGACAAAGAGATAGGTGCTGAAGCGTTGAAACCTTTGTCCTCTCTTAACCTTAGCAATACGTAACCAAACGTGAAGTGCCGAAAGATAAGCAGCAACAGACGTGACTACGGAATCGACAGCACCCATAGCGTACCGAGCGCCACCAGAAAGATTTTTGCGGATTCATCATCGCGGTACCACAAAAATCACCAATAGAAGCGGATGCCAGGATGCGGCACGTTGCGCAACATCTGAATTGAGGTATCCAGGCAGTCGCTTCACTCCCAGCGACAGCCCGCCAATCAGCCCTTACAGTAGAATTCAATCCCCAGCGATCTCGCCAATTGAAAAGCCAACGAACGCCCGCCAGGAG * NM:i:87 ms:i:1407 AS:i:1380 nn:i:0 tp:A:P cm:i:19 s1:i:360 s2:i:0 de:f:0.0652 MD:Z:14G30^C10G17G14G7G52G9C12T22T3T6C1^G0C2C6C0G88^TGT30C63A5T27T11C0C2T11C10T60G20G0C5T131T3^C21A5A0C33^ATC1C26C11T18^G10^CA13T21^G4A26 rl:i:0 XA:i:42 XB:Z:test diff --git a/tests/data/read_correction/expected.alignment_summary.txt b/tests/data/read_correction/expected.alignment_summary.txt index d57e5d49b..38d6a4c6a 100644 --- a/tests/data/read_correction/expected.alignment_summary.txt +++ b/tests/data/read_correction/expected.alignment_summary.txt @@ -1,7 +1,7 @@ filename read_id alignment_genome alignment_genome_start alignment_genome_end alignment_strand_start alignment_strand_end alignment_direction alignment_length alignment_num_aligned alignment_num_correct alignment_num_insertions alignment_num_deletions alignment_num_substitutions alignment_mapq alignment_strand_coverage alignment_identity alignment_accuracy alignment_bed_hits - 3c88104d-7964-43dc-ac47-c22b12cdc994 chr6_PATERNAL:115004800-115077200 164 59219 35 59080 + 59060 59040 59039 5 15 1 60 0.999272 0.999983 0.999644 0 - e3066d3e-2bdf-4803-89b9-0f077ac7ff7f chr6_PATERNAL:115004800-115077200 1917 70731 0 68796 + 68832 68778 68769 18 36 9 60 0.999797 0.999869 0.999085 0 - 73d5eb75-700e-42a0-9a5d-f9952bd7d829 chr6_PATERNAL:115004800-115077200 2873 68539 0 65645 - 65678 65633 65624 12 33 9 60 1 0.999863 0.999178 0 - df814002-1961-4262-aaf5-e8f2760aa77a chr6_PATERNAL:115004800-115077200 9503 72207 12 62700 - 62722 62670 62660 18 34 10 60 0.999315 0.99984 0.999012 0 - b93514e5-c61b-48d8-b730-f6c97d169ff7 chr6_PATERNAL:115004800-115077200 11003 60600 38 49627 + 49601 49585 49585 4 12 0 60 0.999234 1 0.999677 0 - 3855985e-bb9b-4df4-9825-cc08f373342b chr6_PATERNAL:115004800-115077200 12624 68476 10 55847 - 55864 55825 55816 12 27 9 60 0.999821 0.999839 0.999141 0 +corrected_reads.fasta 3c88104d-7964-43dc-ac47-c22b12cdc994 chr6_PATERNAL:115004800-115077200 164 59219 35 59080 + 59060 59040 59039 5 15 1 60 0.999272 0.999983 0.999644 0 +corrected_reads.fasta e3066d3e-2bdf-4803-89b9-0f077ac7ff7f chr6_PATERNAL:115004800-115077200 1917 70731 0 68796 + 68832 68778 68769 18 36 9 60 0.999797 0.999869 0.999085 0 +corrected_reads.fasta 73d5eb75-700e-42a0-9a5d-f9952bd7d829 chr6_PATERNAL:115004800-115077200 2873 68539 0 65645 - 65678 65633 65624 12 33 9 60 1 0.999863 0.999178 0 +corrected_reads.fasta df814002-1961-4262-aaf5-e8f2760aa77a chr6_PATERNAL:115004800-115077200 9503 72207 12 62700 - 62722 62670 62660 18 34 10 60 0.999315 0.99984 0.999012 0 +corrected_reads.fasta b93514e5-c61b-48d8-b730-f6c97d169ff7 chr6_PATERNAL:115004800-115077200 11003 60600 38 49627 + 49601 49585 49585 4 12 0 60 0.999234 1 0.999677 0 +corrected_reads.fasta 3855985e-bb9b-4df4-9825-cc08f373342b chr6_PATERNAL:115004800-115077200 12624 68476 10 55847 - 55864 55825 55816 12 27 9 60 0.999821 0.999839 0.999141 0