Skip to content

Commit

Permalink
Merge branch 'DOR-926-polish-model-names' into 'master'
Browse files Browse the repository at this point in the history
Dorado Polish: fix the model naming convention

See merge request machine-learning/dorado!1314
  • Loading branch information
svc-jstone committed Dec 16, 2024
2 parents 492ef84 + f507d2f commit 8e0e478
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 18 deletions.
6 changes: 0 additions & 6 deletions dorado/cli/polish.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,6 @@ const polisher::ModelConfig resolve_model(const polisher::BamInfo& bam_info,
// Example: dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl_mv
std::string model_name = basecaller_model + polish_model_suffix;

// Example: dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl_mv
std::replace(std::begin(model_name), std::end(model_name), '@', '_');

spdlog::info("Downloading model: '{}'", model_name);
model_dir = download_model(model_name);

Expand All @@ -613,9 +610,6 @@ const polisher::ModelConfig resolve_model(const polisher::BamInfo& bam_info,
// Example: dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl_mv
std::string model_name = basecaller_model + polish_model_suffix;

// Example: dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl_mv
std::replace(std::begin(model_name), std::end(model_name), '@', '_');

spdlog::info("Downloading model: '{}'", model_name);
model_dir = download_model(model_name);

Expand Down
16 changes: 8 additions & 8 deletions dorado/models/models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1121,29 +1121,29 @@ namespace polisher {

const std::vector<ModelInfo> models = {
ModelInfo{
"dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl",
"fdb6d18fe7e93f141c8e15f1b014e338e7cdd2b3f390abd0ff85cede33b50cc7",
"dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl",
"d343b4394b904d219257ad188c82ece63b935f15d78f09f551e591b2275da4b9",
CC::UNKNOWN,
ModelVariantPair{},
ModsVariantPair{},
},
ModelInfo{
"dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl_mv",
"719389cf723a4ddbd020690c20708885ed6b5000c51a2144a5af5b8064a055d2",
"dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl_mv",
"928d9bcf3d68162eff479ada5839c5df3faa0ad393658729511aedffe65f089c",
CC::UNKNOWN,
ModelVariantPair{},
ModsVariantPair{},
},
ModelInfo{
"dna_r10.4.1_e8.2_400bps_sup_v5.0.0_polish_rl",
"3c0c7fbf2f204390ca6dfafaefc25a3f999870368e4a7b5d238eb600aa7e781b",
"dna_r10.4.1_e8.2_400bps_sup@v5.0.0_polish_rl",
"6d8c5a8ce45311c25f824453d0af997fbe2f63a5f734fdb4d884d285ddafec33",
CC::UNKNOWN,
ModelVariantPair{},
ModsVariantPair{},
},
ModelInfo{
"dna_r10.4.1_e8.2_400bps_sup_v5.0.0_polish_rl_mv",
"eb9b4691bda3133e9b991aa1a2666cc632bbd5c2f803dff704ef1265f4315a90",
"dna_r10.4.1_e8.2_400bps_sup@v5.0.0_polish_rl_mv",
"0e0cb175aa41636de835d2abb5330b91fed14a00f811804edf983bc086cf477a",
CC::UNKNOWN,
ModelVariantPair{},
ModsVariantPair{},
Expand Down
6 changes: 3 additions & 3 deletions tests/cram/polish/cram-polish-01-model-download.t
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ Negative test: BAM has a model which is not available for download in auto mode.
> echo "Exit code: $?"
> grep "\[error\]" out/out.fasta.stderr | sed -E 's/.*\[error\] //g'
Exit code: 1
Selected model doesn't exist: dna_r10.4.1_e8.2_400bps_hac_v1.0.0_polish_rl_mv
Could not download model: dna_r10.4.1_e8.2_400bps_hac_v1.0.0_polish_rl_mv
Selected model doesn't exist: dna_r10.4.1_e8.2_400bps_hac@v1.0.0_polish_rl_mv
Could not download model: dna_r10.4.1_e8.2_400bps_hac@v1.0.0_polish_rl_mv

Negative test: BAM has no models listed (no RG tags).
$ rm -rf out; mkdir -p out
Expand Down Expand Up @@ -101,7 +101,7 @@ Load a model from path.
Download an explicit polishing model by name with `--skip-model-compatibility-check`.
$ rm -rf out; mkdir -p out
> in_dir=${TEST_DATA_DIR}/polish/test-01-supertiny
> model="dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl_mv"
> model="dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl_mv"
> ${DORADO_BIN} polish --skip-model-compatibility-check --model "${model}" --device cpu ${in_dir}/calls_to_draft.bam ${in_dir}/draft.fasta.gz -t 4 --infer-threads 1 -vv > out/out.fasta 2> out/out.fasta.stderr
> echo "Exit code: $?"
> ${DORADO_BIN} aligner ${in_dir}/ref.fasta.gz out/out.fasta 1> out/out.sam 2> out/out.sam.stderr
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dorado_polish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ CRAM=$(pwd)/cram-0.6/cram.py
popd

# Download the model once.
MODEL_NAME="dna_r10.4.1_e8.2_400bps_hac_v5.0.0_polish_rl_mv"
MODEL_NAME="dna_r10.4.1_e8.2_400bps_hac@v5.0.0_polish_rl_mv"
MODEL_DIR=${output_dir}/${MODEL_NAME}
if [[ ! -d "${MODEL_DIR}" ]]; then
${DORADO_BIN} download --model "${MODEL_NAME}" --models-directory ${output_dir}
Expand Down

0 comments on commit 8e0e478

Please sign in to comment.