Skip to content

Commit

Permalink
Merge pull request #11 from ncbi/gpipe_compat
Browse files Browse the repository at this point in the history
Version 1.0.31 fixes inconsistencies in --amrfinder output
  • Loading branch information
evolarjun authored Jan 22, 2025
2 parents 0363956 + 198873c commit 48d8ae1
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 44 deletions.
3 changes: 2 additions & 1 deletion amrfinder_columns.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,5 @@ constexpr const char* hierarchyNode_colName = "Hierarchy node";

// PD-5155
constexpr const char* fusion_infix = "::"; // was: "/"


constexpr const char* na ("NA");
36 changes: 20 additions & 16 deletions stxtyper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
* Dependencies: NCBI BLAST, gunzip (optional)
*
* Release changes:
* 1.0.31 01/14/2025 PD-5215 "re-enable reporting Name of closest sequence"
* 1.0.30 01/14/2025 PD-5215 "Closest reference accession" field should have two accessions separated by "," for two-subunit operons
* 1.0.29 01/10/2025 PD-6215 "Name of closest sequence": na for two-subunit operons
* 1.0.28 01/10/2025 PD-5215 blanks fields -> na; "Closest reference accession" is na for two-subunit operons
* branch "gpipe_compat"
* 1.0.27 10/23/2024 PD-5155 "Hierarchy node" with mixed types is <stx1>::<stx2>
* 1.0.26 10/22/2024 PD-5085 Change column "Element length" to "Target length"
* 1.0.25 08/16/2024 PD-5085 AMRFinderPlus column names to match MicroBIGG-E
Expand Down Expand Up @@ -117,7 +122,6 @@ constexpr size_t intergenic_max {36}; // Max. intergenic region in the referenc
constexpr size_t slack = 30;

const string stxS ("stx");
const string na ("NA");



Expand Down Expand Up @@ -331,8 +335,8 @@ struct BlastAlignment
<< subclass //12 "Subclass"
<< operon //13 "Method"
<< targetEnd - targetStart /*targetAlign*/ //14 "Target length"
<< noString /*refLen*/ //15 "Reference sequence length"
<< noString /*refCoverage*/ //16 "% Coverage of reference sequence"
<< na /*refLen*/ //15 "Reference sequence length"
<< na /*refCoverage*/ //16 "% Coverage of reference sequence"
<< refIdentity //17 "% Identity to reference sequence"
<< length //18 "Alignment length"
<< refAccession //19 "Accession of closest sequence"
Expand All @@ -348,24 +352,24 @@ struct BlastAlignment
td << targetName
<< stxType_reported
<< operon
<< noString
<< na
<< targetStart + 1
<< targetEnd
<< strand;
if (subunit == 'B')
td << noString
<< noString
<< noString
<< noString;
td << na
<< na
<< na
<< na;
td << refAccession
<< subClass
<< refIdentity
<< refCoverage;
if (subunit == 'A')
td << noString
<< noString
<< noString
<< noString;
td << na
<< na
<< na
<< na;
}
td. newLn ();
}
Expand Down Expand Up @@ -519,7 +523,7 @@ struct Operon
al2->qc ();
QC_ASSERT (al1->targetName == al2->targetName);
QC_ASSERT (al1->targetStrand == al2->targetStrand);
QC_ASSERT (al1->targetEnd < al2->targetStart);
QC_ASSERT (al1->targetEnd <= al2->targetStart);
QC_ASSERT (al1->subunit != al2->subunit);
QC_ASSERT (al2->reported);
}
Expand Down Expand Up @@ -581,7 +585,7 @@ struct Operon
//const size_t refLen = al1->refLen + al2->refLen;
//const double refCoverage = double (al1->getAbsCoverage () + al2->getAbsCoverage ()) / double (refLen) * 100.0;
const size_t alignmentLen = al1->length + al2->length;
const string refAccessions (al1->refAccession + ", " + al2->refAccession);
const string refAccessions (al1->refAccession + "," + al2->refAccession);
const string fam (al1->getGenesymbol () + fusion_infix + al2->getGenesymbol ());
td << na // 1 "Protein identifier"
<< targetName // 2 "Contig id"
Expand All @@ -597,8 +601,8 @@ struct Operon
<< subclass //12 "Subclass"
<< operonType //13 "Method"
<< targetAlign //14 "Target length"
<< noString /*refLen*/ //15 "Reference sequence length"
<< noString /*refCoverage*/ //16 "% Coverage of reference sequence"
<< na /*refLen*/ //15 "Reference sequence length"
<< na /*refCoverage*/ //16 "% Coverage of reference sequence"
<< refIdentity //17 "% Identity to reference sequence"
<< alignmentLen //18 "Alignment length"
<< refAccessions //19 "Accession of closest sequence"
Expand Down
14 changes: 7 additions & 7 deletions test/amrfinder_integration.expected
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description
NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA
NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA
NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA
NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA
NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA
NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA
NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA
NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 NA NA 99.41 337 AAA16362.1,AAS07607.1 Shiga toxin stx2 NA NA
NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 NA NA 100.00 216 AAM70046.1,AAA16362.1 Shiga toxin stx2 NA NA
NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 NA NA 100.00 406 AAA98347.1,AAA71894.1 Shiga toxin stx1a NA NA
NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 NA NA 99.15 355 AAG01033.1,AAA16363.1 Shiga toxin stx2c NA NA
NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 NA NA 99.76 410 AAA19623.1,AAA16363.1 Shiga toxin stx2c NA NA
NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 NA NA 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA
NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 NA NA 100.00 410 AAA16363.1,AAS07596.1 Shiga toxin stx2c NA NA
14 changes: 7 additions & 7 deletions test/amrfinder_integration2.expected
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#Protein id Contig id Start Stop Strand Element symbol Element name Scope Type Subtype Class Subclass Method Target length Reference sequence length % Coverage of reference % Identity to reference Alignment length Closest reference accession Closest reference name HMM accession HMM description Hierarchy node
NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 99.41 337 AAA16362.1, AAS07607.1 Shiga toxin stx2 NA NA stxA2c::stxB2a
NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 100.00 216 AAM70046.1, AAA16362.1 Shiga toxin stx2 NA NA stxB2a::stxA2c
NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 100.00 406 AAA98347.1, AAA71894.1 Shiga toxin stx1a NA NA stxA1a::stxB1a
NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 99.15 355 AAG01033.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 99.76 410 AAA19623.1, AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA stxA2h
NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 100.00 410 AAA16363.1, AAS07596.1 Shiga toxin stx2c NA NA stxB2c::stxA2a
NA partial 27 1048 + stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL 1022 NA NA 99.41 337 AAA16362.1,AAS07607.1 Shiga toxin stx2 NA NA stxA2c::stxB2a
NA partial_contig_end 3 661 - stx2_operon Partial stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 PARTIAL_CONTIG_END 659 NA NA 100.00 216 AAM70046.1,AAA16362.1 Shiga toxin stx2 NA NA stxB2a::stxA2c
NA stx1a 218 1444 + stx1a_operon stx1a operon plus VIRULENCE STX_TYPE STX1 STX1A COMPLETE 1227 NA NA 100.00 406 AAA98347.1,AAA71894.1 Shiga toxin stx1a NA NA stxA1a::stxB1a
NA stx2_fs 2165 3232 + stx2_operon stx2 operon with frameshift plus VIRULENCE STX_TYPE STX2 STX2 FRAMESHIFT 1068 NA NA 99.15 355 AAG01033.1,AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
NA stx2_novel 216 1456 + stx2_operon Novel stx2 operon plus VIRULENCE STX_TYPE STX2 STX2 COMPLETE_NOVEL 1241 NA NA 99.76 410 AAA19623.1,AAA16363.1 Shiga toxin stx2c NA NA stxA2c::stxB2c
NA stx2_stop 694 1653 + stx2_operon stx2 operon with internal stop plus VIRULENCE STX_TYPE STX2 STX2 INTERNAL_STOP 960 NA NA 91.25 320 AUM09788.1 Shiga toxin stx2h subunit A NA NA stxA2h
NA stx2c 1298 2538 - stx2c_operon stx2c operon plus VIRULENCE STX_TYPE STX2 STX2C COMPLETE 1241 NA NA 100.00 410 AAA16363.1,AAS07596.1 Shiga toxin stx2c NA NA stxB2c::stxA2a
2 changes: 1 addition & 1 deletion test/basic.expected
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ partial_contig_end stx2 PARTIAL_CONTIG_END 100.00 3 661 - AAA16362.1 stxA2c 100.
stx1a stx1a COMPLETE 100.00 218 1444 + AAA98347.1 stxA1a 100.00 100.00 AAA71894.1 stxB1a 100.00 100.00
stx2_fs stx2 FRAMESHIFT 99.15 2165 3232 + AAG01033.1 stxA2c 98.87 82.19 AAA16363.1 stxB2c 100.00 100.00
stx2_novel stx2 COMPLETE_NOVEL 99.76 216 1456 + AAA19623.1 stxA2 99.69 100.00 AAA16363.1 stxB2c 100.00 100.00
stx2_stop stx2 INTERNAL_STOP 694 1653 + AUM09788.1 stxA2h 91.25 100.00
stx2_stop stx2 INTERNAL_STOP NA 694 1653 + AUM09788.1 stxA2h 91.25 100.00 NA NA NA NA
stx2c stx2c COMPLETE 100.00 1298 2538 - AAS07596.1 stxA2 100.00 100.00 AAA16363.1 stxB2c 100.00 100.00
2 changes: 1 addition & 1 deletion test/cases.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#target_contig stx_type operon identity target_start target_stop target_strand A_reference A_reference_subtype A_identity A_coverage B_reference B_reference_subtype B_identity B_coverage
A2l_a2e_equidistant stx2l COMPLETE 99.02 14780 16020 + CAP17609.1 stxA2l 98.75 100.00 CAP17610.1 stxB2 100.00 100.00
PD-4797_multirow stx1 PARTIAL 100.00 1625 2852 - AAA98347.1 stxA1a 100.00 100.00 AAA71894.1 stxB1a 100.00 86.67
PD-4897_multirow_contig_end stx2 PARTIAL_CONTIG_END 11 274 + AAA16361.1 stxB2b 100.00 100.00
PD-4897_multirow_contig_end stx2 PARTIAL_CONTIG_END NA 11 274 + NA NA NA NA AAA16361.1 stxB2b 100.00 100.00
PD-4898_A2a_B2l stx2a COMPLETE 100.00 718 1958 + QZL10984.1 stxA2a 100.00 100.00 QZL10985.1 stxB2 100.00 100.00
PD-5064_ambiguous_bases stx2 AMBIGUOUS 99.27 333 1573 - AAA19623.1 stxA2 99.38 100.00 AAM90978.1 stxB2a 98.89 100.00
stx2d_better_stxB2k stx2d COMPLETE 100.00 3 1243 + AAM22256.1 stxA2 100.00 100.00 MCW3229578.1 stxB2d 100.00 100.00
10 changes: 5 additions & 5 deletions test/virulence_ecoli.expected
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ stx1a-O157-GPU96MM:7:AB035142 stx1a COMPLETE 100.00 1 1227 + AAA98347.1 stxA1a 1
stx1a-O165-HI-2:14:AB048232 stx1a COMPLETE 100.00 1 1227 + AAA98347.1 stxA1a 100.00 100.00 AAA71894.1 stxB1a 100.00 100.00
stx1a-O48-94C:11:Z36899 stx1a COMPLETE 100.00 1 1227 + CAA85366.1 stxA1a 100.00 100.00 AAA71894.1 stxB1a 100.00 100.00
stx1a-ONT-HI-A:15:AB071620 stx1a COMPLETE 99.51 1 1227 + UIU88878.1 stxA1a 99.37 100.00 BAC10988.1 stxB1a 100.00 100.00
stx1c-O174-DG131-3:20:Z36901 stx1 PARTIAL_CONTIG_END 1 948 + CAA85370.1 stxA1c 100.00 100.00
stx1c-ONT-BCN26:19:DQ449666 stx1 PARTIAL_CONTIG_END 1 948 + CAA85370.1 stxA1c 100.00 100.00
stx1c-ONT-HI-B:17:AB071622 stx1 PARTIAL_CONTIG_END 1 948 + CAA85370.1 stxA1c 98.73 100.00
stx1c-ONT-HI-C:18:AB071624 stx1 PARTIAL_CONTIG_END 1 948 + CAA85370.1 stxA1c 100.00 100.00
stx1c-O174-DG131-3:20:Z36901 stx1 PARTIAL_CONTIG_END NA 1 948 + CAA85370.1 stxA1c 100.00 100.00 NA NA NA NA
stx1c-ONT-BCN26:19:DQ449666 stx1 PARTIAL_CONTIG_END NA 1 948 + CAA85370.1 stxA1c 100.00 100.00 NA NA NA NA
stx1c-ONT-HI-B:17:AB071622 stx1 PARTIAL_CONTIG_END NA 1 948 + CAA85370.1 stxA1c 98.73 100.00 NA NA NA NA
stx1c-ONT-HI-C:18:AB071624 stx1 PARTIAL_CONTIG_END NA 1 948 + CAA85370.1 stxA1c 100.00 100.00 NA NA NA NA
stx1c-Out-HI-N:16:AB048237 stx1c COMPLETE 100.00 1 1228 + BAB83022.1 stxA1c 100.00 100.00 BAB83023.1 stxB1c 100.00 100.00
stx1d-ONT-92-1251:21:AB050958 stx1 PARTIAL_CONTIG_END 1 948 + AAO19475.1 stxA1d 100.00 100.00
stx1d-ONT-92-1251:21:AB050958 stx1 PARTIAL_CONTIG_END NA 1 948 + AAO19475.1 stxA1d 100.00 100.00 NA NA NA NA
stx1d-ONT-92-1252:22:AB050959 stx1d COMPLETE 100.00 1 1227 + AAO19475.1 stxA1d 100.00 100.00 AAO19476.1 stxB1d 100.00 100.00
stx1d-ONT-AB8SF:23:AY986980 stx1d COMPLETE 100.00 1 1227 + AAO19475.1 stxA1d 100.00 100.00 AAO19476.1 stxB1d 100.00 100.00
stx1d-ONT-MHI813:24:AY170851 stx1d COMPLETE 100.00 1 1227 + AAO19475.1 stxA1d 100.00 100.00 AAO19476.1 stxB1d 100.00 100.00
Expand Down
10 changes: 5 additions & 5 deletions test_stxtyper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ FAILURES=0
TESTS=0
TEST_TEXT=''

echo "TERM=$TERM"
# echo "TERM=$TERM"

# some color macros
if [ "$TERM" == "" ] || [ "$TERM" == "dumb" ]
if [ "$TERM" == "" ] || [ "$TERM" == "dumb" ] || [ ! -t 1 ]
then
green='' # no colors
red=''
Expand All @@ -62,7 +62,7 @@ function test_input_file {
then
echo "${red}not ok: $STXTYPER returned a non-zero exit value indicating a failure of the software${reset}"
echo "# $STXTYPER $options -n test/$test_base.fa > test/$test_base.got"
TEST_TEXT="$TEST_TEXT"$'\n'"Failed $test_base"
TEST_TEXT="$TEST_TEXT"$'\n'"${red}Failed $test_base${reset}"
return 1
else
if ! diff -q "test/$test_base.expected" "test/$test_base.got"
Expand All @@ -73,7 +73,7 @@ function test_input_file {
diff "test/$test_base.expected" "test/$test_base.got"
echo "# To approve run:"
echo "# mv test/$test_base.got test/$test_base.expected "
TEST_TEXT="$TEST_TEXT"$'\n'"Failed $test_base"
TEST_TEXT="$TEST_TEXT"$'\n'"${red}Failed $test_base${reset}"
return 1
else
echo "${green}ok:${reset} test/$test_base.fa"
Expand All @@ -95,7 +95,7 @@ then
diff "test/basic.nuc_out.expected" "test/basic.nuc_out.got"
echo "# To approve run:"
echo "# mv test/basic.nuc_out.got test/basic.nuc_out.expected "
TEST_TEXT="$TEST_TEXT"$'\n'"Failed basic --nucleotide_output test"
TEST_TEXT="$TEST_TEXT"$'\n'"${red}Failed basic --nucleotide_output test${reset}"
FAILURES=$(( $FAILURES + 1 ))
else
echo "${green}ok:${reset} --nucleotide_output test/basic.nuc_out.got options worked"
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.27
1.0.31

0 comments on commit 48d8ae1

Please sign in to comment.