Makefile.params

# vim:noet:ts=3:nowrap
#
# @file Makefile.params
# @brief Master parameter file where all user specific parameters should be set.
#
# @author Samuel Larkin, Darlene Stewart and Eric Joanis
#
# Traitement multilingue de textes / Multilingual Text Processing
# Centre de recherche en technologies numériques / Digital Technologies Research Centre
# Conseil national de recherches Canada / National Research Council Canada
# Copyright 2008, 2012, 2015, 2016, 2018, Sa Majeste la Reine du Chef du Canada
# Copyright 2008, 2012, 2015, 2016, 2018, Her Majesty in Right of Canada

# Print the PortageII Copyright no matter where we start from -- but only once.
ifeq (${MAKELEVEL},0)
   $(shell portage_info)
   # subprograms launched by the framework don't need to bleet all the time either...
   export PORTAGE_INTERNAL_CALL=1
endif

################################################################################
# User definable variables

# Source/from language (must be two lowercase letters)
SRC_LANG ?= en
# Target/to language (must be two lowercase letters)
TGT_LANG ?= fr

# Source locale country code (two uppercase letters).
SRC_LOCALE_COUNTRY ?= CA

# Target locale country code (two uppercase letters).
TGT_LOCALE_COUNTRY ?= CA

########################################
# Corpora specification

# Here we specify the stems of the corpora files.
# Files should look like this: <PREFIX>_<LANGUAGE>.al
# e.g. test1_fr.al
# Warning: TRAIN_TC, TRAIN_SPARSE, TUNE_DECODE, TUNE_RESCORE, TUNE_CE may not contain
#          more than one <PREFIX>.

# Set PRIMARY_LM to your in-domain target-language LM training corpus.
# If there is a generic LM for your target language, it will automatically be
# used in a MixLM with your main LM
# If you specify two or more LMs here, they will be groupped in a MixLM (with
# the generic LM if it exists).
# If you want to manually set your LM or MixLM parameters differently, comment
# this variable out and define the LM-related variables below.
# Warning: if you use PRIMARY_LM, leave TRAIN_LM and MIXLM undefined.
PRIMARY_LM    ?= lm-train

# TRAIN_LM is used to train regular (non mix) language models.
# NOTE: Do not include stems for any files listed in LM_PRETRAINED_TGT_LMS below.
# NOTE: Do not define TRAIN_LM if you defined PRIMARY_LM above.
#TRAIN_LM      ?= lm-train

# LM_PRETRAINED_TGT_LMS specifies file paths to additional pre-trained target
# language LMs to use with the lm module, for example a generic model LM.
# These file names should end with the extension .tplm and must include
# the string _${TGT_LANG}, where ${TGT_LANG} is the target language, e.g. _fr .
# Paths may be absolute or relative to the location of this Makefile.params file.
#LM_PRETRAINED_TGT_LMS ?= ${PORTAGE_GENERIC_MODEL}/generic-2.1/lm/generic-2.1_${TGT_LANG}.tplm

# MIXLM is used to create a mixture language model which itself is composed of
# several other language models.
# NOTE: Do not include stems for any files listed in MIXLM_PRETRAINED_TGT_LMS below.
# NOTE: Do not define TRAIN_LM if you defined PRIMARY_LM above.
#MIXLM         ?= sublm1 sublm2 sublm3

# MIXLM_PRETRAINED_TGT_LMS specifies file paths to additional pre-trained target
# language LMs to use with the mixlm module, for example a generic model LM.
# These file names should end with the extension .tplm and must include
# the string _${TGT_LANG}, where ${TGT_LANG} is the target language, e.g. _fr .
# Paths may be absolute or relative to the location of this Makefile.params file.
# Note: The corresponding source language LMs are also required, should have
# the same name except for the language code, and should be located in the same
# directory.
# NOTE: if you used PRIMARY_LM above and the Generic LM exists for your target
# language, it will automatically be used if no other MIXLM_PRETRAINED_TGT_LMS
# are defined.
#MIXLM_PRETRAINED_TGT_LMS ?= ${PORTAGE_GENERIC_MODEL}/generic-2.1/lm/generic-2.1_${TGT_LANG}.tplm

# TRAIN_COARSELM is used to train coarse language models.
# By default, we train coarse LMs for the corpora listed in PRIMARY_LM, or
# TRAIN_LM (if no PRIMARY_LM), or MIXLM (if no PRIMARY_LM or TRAIN_LM).
# If you want to use different corpora, uncomment and define TRAIN_COARSELM.
# => TRAIN_COARSELM = PRIMARY_LM or TRAIN_LM or MIXLM
#TRAIN_COARSELM ?= coarselm-train

# TRAIN_BILM is used to train BiLM models.
# By default, we train BiLMs for the corpora listed in TRAIN_TM; if you
# want to use different corpora, uncomment and define TRAIN_BILM.
# => TRAIN_BILM = TRAIN_TM
#TRAIN_BILM ?= bilm-train

# TRAIN_TC is used to train a truecasing model.
# By default, we train the TC model using the first corpus listed in PRIMARY_LM,
# or TRAIN_LM (if no PRIMARY_LM), or MIXLM (if no PRIMARY_LM or TRAIN_LM).
# If you want to use a different corpus, uncomment and define TRAIN_TC
# explicitly.
# => TRAIN_TC = first word of PRIMARY_LM or TRAIN_LM or MIXLM
#TRAIN_TC ?= tc-train

# TRAIN_TM is used to create the translation tables.
# NOTE: Do not include stems for any files listed in TM_PRETRAINED_TMS below.
TRAIN_TM      ?= tm-train

# TM_PRETRAINED_TMS specifies file paths to additional pre-trained translation
# tables to use with the tm module, for example a generic model TM.
# These file names start with cpt. and end in .${SRC_LANG}2${TGT_LANG}.gz,
# where ${SRC_LANG} is the source language and ${TGT_LANG} is the target
# language, e.g. .en2fr.gz .
# Paths may be absolute or relative to the location of this Makefile.params file.
#TM_PRETRAINED_TMS ?= ${PORTAGE_GENERIC_MODEL}/generic-2.1/tm/cpt.generic-2.1.${SRC_LANG}2${TGT_LANG}.gz

# MIXTM is used to create a mixture translation model from several other
# translation models.
# Specify the in-domain corpus first (its word alignment models are needed),
# or specify an alternate training corpus in MIXTM_TRAIN_MIX below.
# NOTE: Do not include stems for any files listed in MIXTM_PRETRAINED_TMS below.
#MIXTM         ?= subtm1 subtm2

# MIXTM_PRETRAINED_TMS specifies file paths to additional pre-trained translation
# tables to use in creating a mixture translation model, for example a generic
# model TM.
# These file names should start with cpt.
# Paths may be absolute or relative to the location of this Makefile.params file.
#MIXTM_PRETRAINED_TMS ?= ${PORTAGE_GENERIC_MODEL}/generic-2.1/tm/cpt.generic-2.1.${SRC_LANG}2${TGT_LANG}.gz

# MIXTM_TRAIN_MIX is an in-domain corpus used to train the weights for mixing
# the component translation models (its word alignment models are needed)
# By default, we use the first name listed in MIXTM; if you want to use a
# different corpus, uncomment and define MIXTM_TRAIN_MIX.
# => MIXTM_TRAIN_MIX = first word of MIXTM
#MIXTM_TRAIN_MIX ?= subtm1


### NNJM training
# NNJM training can be done in three modes:
#  1: 1-pass training on your own in-domain data only:
#     - define NNJM_TRAIN_CORPUS to point to your in-domain data, e.g.,
#       ${TRAIN_TM}
#     - leave NNJM_FINE_TUNING_TRAIN_CORPUS and NNJM_PRETRAINED_NNJM empty
#  2: 2-pass training, first on your own generic data, with fine-tuning on your
#  own in-domain data:
#     - define NNJM_TRAIN_CORPUS to point to your generic data
#     - define NNJM_FINE_TUNING_TRAIN_CORPUS to point to your in-domain data
#     - leave NNJM_PRETRAINED_NNJM empty
#  3: 2-pass training, starting from a pre-trained NNJM (e.g., NRC's generic
#  one), with fine-tuning on your own in-domain data:
#  	- define NNJM_FINE_TUNING_TRAIN_CORPUS to point to your in-domain data
#  	- define NNJM_PRETRAINED_NNJM to point to the pre-trained model
#  	- leave NNJM_TRAIN_CORPUS empty.
# Recommendation: 2-pass training is best. Use the NRC's generic model if your
# language pair is en-fr (mode 3), or use the largest generic corpus your have
# access to (mode 2).

# NNJM main model:
# Define NNJM_TRAIN_CORPUS to train the main NNJM from scratch on this corpus,
# typically large generic data.
# NOTE: Either use NNJM_TRAIN_CORPUS or NNJM_PRETRAINED_NNJM but not both.
#NNJM_TRAIN_CORPUS ?=
NNJM_DEV_CORPUS ?= ${TUNE_DECODE} # Validation corpus, for early stopping
NNJM_TEST_CORPUS ?=               # Test corpus, optional

# NNJM fine tuning.
# Define NNJM_FINE_TUNING_TRAIN_CORPUS to fine tune your NNJM model.
# This triggers a second-pass training on an existing model with in-domain data.
NNJM_FINE_TUNING_TRAIN_CORPUS ?=
NNJM_FINE_TUNING_DEV_CORPUS ?= ${NNJM_DEV_CORPUS}   # Validation corpus, for early stopping
NNJM_FINE_TUNING_TEST_CORPUS ?= ${NNJM_TEST_CORPUS} # Test corpus, optional

# NNJM_PRETRAINED_NNJM specifies file path to an additional pre-trained Neural
# Network Joint Models to use in your canoe.ini.  The NNJM's filename must be
# 'model' and it should use memory mapped class files.
# Path may be absolute or relative to the location of this Makefile.params file.
#NNJM_PRETRAINED_NNJM ?= ${PORTAGE_GENERIC_MODEL}/generic-2.1/nnjm/nnjm.generic-2.1.${SRC_LANG}2${TGT_LANG}/model


# TRAIN_LDM is used to train a Lexicalized Distortion Model (LDM)
# (the word alignment models are needed).
# By default, we use the corpora listed in TRAIN_TM and MIXTM; if you want to
# use different corpora, uncomment and define TRAIN_LDM.
# => TRAIN_LDM = TRAIN_TM MIXTM
#TRAIN_LDM ?= ldm-train

# TRAIN_HLDM is used to train a Hierarchical Lexicalized Distortion Model (HLDM)
# (the word alignment models are needed).
# By default, we use the corpora listed in TRAIN_LDM if it is defined, or
# TRAIN_TM and MIXTM if TRAIN_LDM is not defined; if you want to use different
# corpora, uncomment and define TRAIN_LDM.
# => TRAIN_HLDM = TRAIN_LDM or TRAIN_TM MIXTM
#TRAIN_HLDM ?= hldm-train

# TRAIN_SPARSE is used to train sparse features
# It should point to the concatenation of all your TRAIN_TM and MIXTM corpora in use.
# Warning: you must create that concatenated corpus manually or outside the framework.
# By default we use the first corpus listed in TRAIN_TM;
# if you want to use a different corpus, uncomment and define TRAIN_SPARSE.
# => TRAIN_SPARSE = first word of TRAIN_TM
#TRAIN_SPARSE ?= sparse-train

# TRAIN_WCL is used to train word classes for use in coarse models.
# It should point to the concatenation of all your training corpora.
# By default, we use the TRAIN_LM, MIXLM, TRAIN_TM, MIXTM corpora; if you
# want to use different corpora, uncomment and define TRAIN_WCL.
# => TRAIN_WCL = TRAIN_LM MIXLM TRAIN_TM MIXTM
#TRAIN_WCL ?= wcl-train

# TUNE_DECODE is used to tune the decoding weights (using tune.py).
TUNE_DECODE   ?= dev1
# TUNE_DECODE_VARIANTS is used to tune and test using multiple tuning sets.
# Typically, each variant is a 90% sample subset of the TUNE_DECODE set.
# Ex. If TUNE_DECODE is dev1, include "a b" in TUNE_DECODE_VARIANTS to tune
#     with dev1a and dev1b in addition to dev1.
# If variants are specified, 90% sample variants of the tuning set are created
# automatically in the corpora directory if the variant files do not already exist.
#TUNE_DECODE_VARIANTS  ?= a b c d

# PLIVE_DECODE_VARIANT is used to select which tuning run (which one of
# TUNE_DECODE_VARIANTS) to use for PortageLive.
# Leave PLIVE_DECODE_VARIANT undefined or blank to use weights from the main
# tuning run based on TUNE_DECODE, or specify one of the variants from
# TUNE_DECODE_VARIANTS to use weights from the tuning run corresponding to
# that variant.
# Recommended usage: after tuning, set this to the variant giving the best
# bleu score on the test sets.
#PLIVE_DECODE_VARIANT ?=

# TUNE_RESCORE is used to tune the rescoring weights (using rescore.py).
#TUNE_RESCORE  ?= dev2

# TUNE_CE is used to train confidence estimation.
# Note: it's OK for TUNE_DECODE and TUNE_RESCORE to be the same file, but
# TUNE_CE must be completely distinct, not only from all training data, but
# also from all other tuning data.
#TUNE_CE       ?= dev3

# TEST_SET files are used to estimate the translation quality of the system.
TEST_SET      ?= test1 test2
# Uncomment if you have source text, that doesn't have a reference, to translate.
#TRANSLATE_SET ?=

# Uncomment if you have multiple references for your heldout sets.
# Note that all dev and test sets must have the same number of references.
# <stem>_<tgt_language>#.al
# Where # is some reference identifier.
# i.e. dev_fr1.al, dev_fr2.al, dev_fr3.al & dev_fr4.al
#REFERENCE_INDICES ?= 1 2 3 4

########################################
# Features that can be enabled.

# Normally, the mixlm weights computed for TUNE_DECODE are also used when
# translating the test sets because computing the weights dynamically is
# not supported in deployed PortageLive systems.
# USE_DYNAMIC_MIXLM applies only when a MixLM is used.
# Comment out to use static (TUNE_DECODE) mixlm weights for test sets;
# uncomment to enable dynamic calculation of mixlm weights for test sets.
#USE_DYNAMIC_MIXLM ?= 1

# Train and apply rescoring if this variable is defined.
# Expensive!  Use only if the last small BLEU increment is important to you.
# Comment out to disable rescoring; uncomment to enable.
#DO_RESCORING = 1

# Tune and apply confidence estimation if this variable is defined.
# Comment out to disable confidence estimation; uncomment to enable.
#DO_CE = 1

# Train and use a Lexicalized Distortion Model (LDM).
# Comment out to disable using an LDM; uncomment to enable.
#USE_LDM = 1

# Train and use a Hierarchical Lexicalized Distortion Model (HLDM).
# HLDM seems to be quite effective in most scenarios, so we enable it by default.
# Comment out to disable using an HLDM; uncomment to enable.
USE_HLDM = 1

# Train and use a sparse model
# Comment out to disable using a sparse model; uncomment to enable.
USE_SPARSE = 1

# Train and use coarse LMs.
# If enabled, by default this will add 200-class and 800-class coarse LMs.
# Comment out to disable using coarse LMs; uncomment to enable.
USE_COARSELM = 1

# Train and use (coarse) BiLMs.
# If enabled, by default this will train a 400bi-400s-400t coarse BiLM
# (400 bitoken classes with 400 src word classes and 400 tgt word classes).
# Comment out to disable using BiLMs (default); uncomment to enable.
#USE_BILM = 1

# Train and apply truecasing if this variable is defined.
# Comment out to disable truecasing; uncomment to enable.
DO_TRUECASING = 1

# If USE_SIGPRUNING is set, phrase tables will be filtered using significance
# pruning before they are used.  Significance pruning removes phrase pairs that
# are statistically well attested in the training corpus.  Sig-pruning results
# in much smaller phrase tables, usually without loss in BLEU, sometimes in
# fact with a gain in BLEU.
#USE_SIGPRUNING = 1

########################################
# Tokenization and other preprocessing

# If you have your own tokenizer or detokenizer and you want to use it, then
# define the following variables to the command line to run it.  Note that the
# variable names contain the source or target language two-letter identifier.
# For, example, for Spanish source you would define TOKENIZER_es, for French
# target you would define DETOKENIZER_fr.  More generally,
# {DE|}TOKENIZER_{${SRC_LANG}|${TGT_LANG}}.
# Examples of defining what tokenizers we want to use:
# Call opennlp instead of the Portage tokenizer:
#TOKENIZER_en := opennlp TokenizerME /modeldir/en-model.bin
# Call the Portage tokenizer:
#TOKENIZER_fr := utokenize.pl -noss -lang=fr
# Call the Portage tokenizer, using fix-slashes.pl to separate pairs of words
# joined by / (this is now the default for fr, en, es, da):
#TOKENIZER_en := { set -o pipefail; fix-slashes.pl | utokenize.pl -noss -lang=en; }
#TOKENIZER_fr := { set -o pipefail; fix-slashes.pl | utokenize.pl -noss -lang=fr; }
# Do Chinese segmentation using ICTCLAS (assuming you have it):
#TOKENIZER_ch := { set -o pipefail; iconv -c -f UTF-8 -t CN-GB | ictclas_preprocessing.pl | ictclas | ictclas_postprocessing.pl | iconv -c -f CN-GB -t UTF-8; }
# Or do Chinese segmentation using the LDC-based segmenter we distribute:
#TOKENIZER_ch := chinese_segmenter.pl
# Do Arabic tokenization in Portage (using precomputed MADA map; requires MADA):
#TOKENIZER_ar ?= tokenize_plugin ar


# Defining what detokenizers we want to use:
#DETOKENIZER_en ?= opennlp DetokenizerME /modeldir/en-model.bin
#DETOKENIZER_fr ?= udetokenize.pl -lang=fr

# If you have ictclas installed and want to use it to tokenize Chinese,
# uncomment the following:
# USE_ICTCLAS ?= 1

# Language specific set of command to mark source devs/tests.
#MARK_RULE_en ?= canoe-escapes.pl -add
#MARK_RULE_fr ?= canoe-escapes.pl -add
#MARK_RULE_ch ?= { chinese_rule_markup.pl | chinese_rule_create.pl; }

########################################
# LM Toolkit

# Change LM_TOOLKIT's value depending on the LM toolkit you have.  If you use
# SRILM or MITLM, their executable scripts and programs must be on your PATH.
# LM_TOOLKIT={SRI,IRST,MIT}
# where SRI  => SRILM toolkit
#       IRST => IRSTLM toolkit
#       MIT  => MITLM toolkit
LM_TOOLKIT = MIT

# The prefix_root where we can find IRSTLM/bin, which must also be on your
# PATH.  (Only needed if you are using IRSTLM - see next variable.)
IRSTLM ?= $(PORTAGE)/pkgs/irstlm


########################################
# Parallelism levels and cluster control.

# How many CPUs should each PortageLive request use?
# You can increase this parameter if your PortageLive server has multiple
# cores.  Monitor use to make sure your server does not get saturated.  You can
# adjust this parameter later for a running system by changing the '-n <N>'
# parameter in soap-translate.sh for each installed PortageLive context.  To
# retroactively add parallelism to previously trained PortageLive contexts, add
# "-w=3 -n=<n>" to their soap-translate.sh.
PARALLELISM_LEVEL_PORTAGELIVE ?= 1

# If you are on a cluster that is run-parallel.sh friendly, define the
# following to force cluster mode.  You normally don't need to do so, though,
# since clusters are detected automatically below.
#USING_CLUSTER ?= 1

# If you are on a cluster but you want to force single computer mode,
# uncomment the following line:
#NOCLUSTER ?= 1

# Automatically detects if we are on a cluster.
ifeq ($(strip $(shell on-cluster.sh && echo "true")),true)
   USING_CLUSTER ?= 1
endif
ifdef NOCLUSTER
   USING_CLUSTER =
endif

OSTYPE ?= $(shell uname -s)

ifdef USING_CLUSTER
PARALLELISM_LEVEL_CORPORA ?= 10
PARALLELISM_LEVEL_LM ?= 5
PARALLELISM_LEVEL_LDM ?= 30
PARALLELISM_LEVEL_TM ?= 5
PARALLELISM_LEVEL_SPARSE ?= 10
PARALLELISM_LEVEL_TUNE_DECODE  ?= 10
PARALLELISM_LEVEL_TUNE_RESCORE ?= 10
PARALLELISM_LEVEL_TUNE_CONFIDENCE ?= 10
# Be careful not to over-parallelize for translation if models take long to load, especially if translating many test files.
# One can run canoe-timing-stats.pl on the resulting logs to help assess.
PARALLELISM_LEVEL_TRANSLATE    ?= 1
ifeq (${MAKELEVEL},0)
   $(info Running in cluster mode.)
endif
else
# Make sure we run in serial mode.
.NOTPARALLEL:
# Autodetect the number of available cpus on this machine.
ifneq (${OSTYPE},Darwin)
NCPUS := $(shell test -n "$$OMP_NUM_THREADS" && echo $$OMP_NUM_THREADS || grep processor /proc/cpuinfo | wc -l)
else
NCPUS := $(shell test -n "$$OMP_NUM_THREADS" && echo $$OMP_NUM_THREADS || sysctl -n hw.ncpu)
endif

# Uncomment the following line to fix the parallelism level in single-host mode
# You would normally do this to set NCPUS to a smaller number than you really
# have, e.g., if you are sharing the machine with other uses.
# Should be <= the real number of CPUs on your machine.
#NCPUS := 24

PARALLELISM_LEVEL_CORPORA ?= ${NCPUS}
PARALLELISM_LEVEL_LM ?= ${NCPUS}
PARALLELISM_LEVEL_LDM ?= ${NCPUS}
PARALLELISM_LEVEL_TM ?= ${NCPUS}
PARALLELISM_LEVEL_SPARSE ?= ${NCPUS}
PARALLELISM_LEVEL_TUNE_DECODE  ?= ${NCPUS}
PARALLELISM_LEVEL_TUNE_RESCORE ?= ${NCPUS}
PARALLELISM_LEVEL_TUNE_CONFIDENCE ?= ${NCPUS}
# Be careful not to over-parallelize for translation if models take long to load, especially if translating many test files.
# One can run canoe-timing-stats.pl on the resulting logs to help assess.
PARALLELISM_LEVEL_TRANSLATE    ?= 1
# Disable CLUSTER mode in all of PortageII's software.
export PORTAGE_NOCLUSTER=1
ifeq (${MAKELEVEL},0)
   $(info Running in local mode.)
endif
endif


########################################
# Sanitize user inputs.
# Remove accidental user spaces that would otherwise confuse make.
SRC_LANG := $(strip ${SRC_LANG})
TGT_LANG := $(strip ${TGT_LANG})
PRIMARY_LM := $(strip ${PRIMARY_LM})
TRAIN_LM := $(strip ${TRAIN_LM})
MIXLM := $(strip ${MIXLM})
TRAIN_TM := $(strip ${TRAIN_TM})
MIXTM := $(strip ${MIXTM})
TUNE_DECODE := $(strip ${TUNE_DECODE})
TUNE_DECODE_VARIANTS := $(strip ${TUNE_DECODE_VARIANTS})
TUNE_RESCORE := $(strip ${TUNE_RESCORE})
TUNE_CE := $(strip ${TUNE_CE})


################################################################################
# Advanced configuration variables

# Compress extension.
GZ ?= .gz

# Raw file's extension.
ALIGNX ?= .al

# Extension for rule files which must also be source file.
RULEX ?= _${SRC_LANG}.rule

# Language extension for phrase table corpora.
# Extension for corpora.
LANGX  ?= .lc
# Extension for compressed corpora.
LANGXZ ?= ${LANGX}${GZ}

# Language pair for this system.
LANGS ?= ${SRC_LANG} ${TGT_LANG}

# Auto-detect whether PortageII was compiled with ICU.
ifeq (${MAKELEVEL},0)
   PORTAGE_VERSION:=$(shell portage_info -version)
   ifeq (${PORTAGE_VERSION},)
      $(warning Cannot find portage_info; make sure a version of PortageII is installed.)
   endif
endif
ifeq ($(strip $(shell portage_info -with-icu > /dev/null && echo "true")),true)
   ICU = 1
   ifeq (${MAKELEVEL},0)
      $(info ${PORTAGE_VERSION} was compiled with ICU)
   endif
else
   ifeq (${MAKELEVEL},0)
      $(info ${PORTAGE_VERSION} was compiled without ICU)
   endif
endif
ifeq (${MAKELEVEL},0)
$(info )	  # Leave a blank line
endif

# Apply the default logic triggered by using PRIMARY_LM instead of TRAIN_LM or MIXLM
ifdef PRIMARY_LM
   ifneq ($(or ${MIXLM},${TRAIN_LM}),)
      $(error When you define PRIMARY_LM, leave TRAIN_LM and MIXLM undefined)
   endif
   ifdef MIXLM_PRETRAINED_TGT_LMS
      ifeq (${MAKELEVEL},0)
         $(info PRIMARY_LM and MIXLM_PRETRAINED_TGT_LMS specified: combining them in a MixLM)
      endif
      MIXLM=${PRIMARY_LM}
   else
      MIXLM_PRETRAINED_TGT_LMS_GLOB=$(wildcard ${PORTAGE_GENERIC_MODEL}/generic-2.1/lm/generic-2.1_${TGT_LANG}.tplm)
      #$(info mixlm test ${MIXLM_PRETRAINED_TGT_LMS_GLOB})
      ifneq (${MIXLM_PRETRAINED_TGT_LMS_GLOB},)
         ifeq (${MAKELEVEL},0)
            $(info Found pre-trained generic LM and PRIMARY_LM: combining them in a MixLM)
         endif
         MIXLM=${PRIMARY_LM}
         MIXLM_PRETRAINED_TGT_LMS=${MIXLM_PRETRAINED_TGT_LMS_GLOB}
      else
         ifneq ($(filter ${TGT_LANG},en fr),)
            LM_ERROR_MSG := Generic LM for ${TGT_LANG} not found; please install PortageII Generic Model 2.1, or set \
               PORTAGE_GENERIC_MODEL, or set MIXLM_PRETRAINED_TGT_LMS, or use one of MixLM or TRAIN_LM instead of PRIMARY_LM
            ifeq ($(filter clean clean.% doc help,${MAKECMDGOALS}),)
               $(error ${LM_ERROR_MSG})
            else
               $(warning ${LM_ERROR_MSG})
            endif
         endif
         ifneq ($(words ${PRIMARY_LM}),1)
            ifeq (${MAKELEVEL},0)
               $(info Multiple LMs in PRIMARY_LM: combining them in a MixLM)
            endif
            MIXLM=${PRIMARY_LM}
         else
            ifeq (${MAKELEVEL},0)
               $(info No pre-trained generic LM found: using a regular in-domain LM)
            endif
            TRAIN_LM=${PRIMARY_LM}
         endif
      endif
   endif
else
   ifdef MIXLM
      ifdef MIXLM_PRETRAINED_TGT_LMS
         ifeq (${MAKELEVEL},0)
            $(info MIXLM and MIXLM_PRETRAINED_TGT_LMS specified: combining them in a MixLM)
         endif
      else ifneq ($(words ${MIXLM}),1)
         ifeq (${MAKELEVEL},0)
            $(info Multiple LMs in MIXLM specified: combining them in a MixLM)
         endif
      endif
   else ifdef MIXLM_PRETRAINED_TGT_LMS
      ifneq ($(words ${MIXLM_PRETRAINED_TGT_LMS}),1)
         ifeq (${MAKELEVEL},0)
            $(info Multiple LMs in MIXLM_PRETRAINED_TGT_LMS specified: combining them in a MixLM)
         endif
      endif
   endif
endif

# With the MIT LM toolkit, a dev set can be used to tune the LM KN discounting
# parameters. It is recommended that a dev set other than the TUNE_DECODE set
# be used for such tuning; it's okay to use TUNE_RESCORE or TUNE_CE if different
# from TUNE_DECODE. To tune the LM parameters, define TUNE_LM below.
TUNE_LM ?= ${TUNE_DECODE}
TUNE_LM := $(strip ${TUNE_LM})

# Make sure USE_DYNAMIC_MIXLM is not defined if not using MixLMs.
ifdef USE_DYNAMIC_MIXLM
ifeq ($(strip ${MIXLM} ${MIXLM_PRETRAINED_TGT_LMS}),)
USE_DYNAMIC_MIXLM =
endif
endif

USE_MIXTM := $(if $(strip ${MIXTM} ${MIXTM_PRETRAINED_TMS}),1)
ifdef USE_MIXTM
# By default we assume that the user wants to use the first MIXTM corpus (its
# word alignment models are needed) to train the weights for mixing the
# component translation models.
MIXTM_TRAIN_MIX ?= $(firstword ${MIXTM})
MIXTM_TRAIN_MIX := $(strip ${MIXTM_TRAIN_MIX})

# We need a dev set for tuning the mixture weights in a mixtm.
TUNE_MIXTM ?= ${TUNE_DECODE}
TUNE_MIXTM := $(strip ${TUNE_MIXTM})

# It may be beneficial to use a global Word Alignment Model for training a
# MIXTM. To do so, define MIXTM_USE_GLOBAL_WORD_ALIGNMENT_MODEL (uncomment):
#MIXTM_USE_GLOBAL_WORD_ALIGNMENT_MODEL ?= 1

ifdef REFERENCE_INDICES
TUNE_MIXTM_MULTIPLE_REFERENCES ?= $(strip ${TUNE_MIXTM}).multiple.references
endif
endif

# In the case where you want a merged_cpt, you will need to define the following:
# MERGED_CPT_ZN_MODEL & MERGED_CPT_JPT_TYPES
# What word alignment model to use for Zens-Ney's smoother when building a merged_cpt?
# MERGED_CPT_ZN_MODEL can be one of: IBM4, HMM3, HMM2, HMM1, IBM2 or IBM1
MERGED_CPT_ZN_MODEL ?= HMM3
# What type of jpts should be used to create the final merged_cpt?
# MERGED_CPT_JPT_TYPES can be sevaral of: FAST_ALIGN, IBM4, HMM3, HMM2, HMM1, IBM2 or IBM1
# Using IBM4 models in addition to IBM2 and HMM3 gives a small boost in BLEU
# for most scenarios, and increases reliability, so we now enable it by default.
MERGED_CPT_JPT_TYPES ?= IBM2 HMM3 IBM4
# Are we using alignment indicator features?
MERGED_CPT_USE_ALIGNMENT_INDICATORS ?= 0

# Specify the alignment symmetrization strategy:
# The former default, IBMOchAligner 3 (GDF), gives denser alignments and smaller
# phrase tables It works best for very large corpora.  This is equivalent to "diag"
# in Koehn, Och and Marcu (2003), and is often referred to as "grow-diag-final".
#ALIGNMENT_SYMMETRIZATION_OPTIONS = -a GDF
# IBMOchAligner 4 (GDFA) gives sparser, higher confidence alignments, and larger
# phrase tables.  It works best for small to large corpora: with up to hundreds
# of thousands of sentence pairs, maybe more, this is known to be the better option.
# This is equivalent to "diag-and" in Koehn, Och and Marcu (2003), and is often
# referred to as "grow-diag-final-and".
ALIGNMENT_SYMMETRIZATION_OPTIONS = -a GDFA

# Define what type of phrase table we want to generate.
# Can be one or more: ibm2_cpt, hmm1_cpt, hmm2_cpt, hmm3_cpt, ibm4_cpt,
# merged_cpt, or indicator_cpt.
# Typically, merged_cpt or indicator_cpt is used alone.
# WARNING: changes here must be manually reflected in
# models/rescore/rescore-model.template and models/confidence/ce-notm.template:
# look for upper case tokens like HMM3FWD and similar ones nearby.
# PT_TYPES ?= ibm2_cpt hmm3_cpt
ifeq (${MERGED_CPT_USE_ALIGNMENT_INDICATORS},1)
   PT_TYPES ?= $(strip $(if ${TRAIN_TM}, indicator_cpt) \
                       $(if ${USE_MIXTM}, mix_cpt))
else
   PT_TYPES ?= $(strip $(if $(strip ${TRAIN_TM}), merged_cpt) \
                       $(if ${USE_MIXTM}, mix_cpt))
endif

# Determine whether TMs will include the alignment (a=) field or not.
# This field is used for truecasing and tags transfer.
TMS_WITH_ALIGNMENT_FIELD ?= 1

# Define the significance threshold for significance pruning.
# Usually the default of a+e is the ideal value.
# SIG_THRESHOLD must be a positive real number or a pre-defined constant:
# 'a+e' (alpha + epsilon) is the significance threshold such that <1,1,1>
#       phrase pairs are filtered out.
# 'a-e' (alpha - epsilon) is the significance threshold such that <1,1,1>
#       phrase pairs are kept.
# Note: a (alpha) is the significance level of <1,1,1> phrase pairs, as
# discussed in Johnson et al, EMNLP 2007.
ifdef USE_SIGPRUNING
   SIG_THRESHOLD ?= a+e
endif
SIG_THRESHOLD := $(strip ${SIG_THRESHOLD})

# On some clusters or highly parallel file systems, memory-mapped class files
# can be inefficient - uncomment to use the regular ones instead
SPARSE_MODEL_NO_MMCLS ?= 1

# By default, we assume that the user wants to use the corpora listed in
# TRAIN_TM and MIXTM to build the Lexicalized Distortion Model (LDM).
ifdef USE_LDM
   TRAIN_LDM ?= $(sort ${TRAIN_TM} ${MIXTM})
endif
TRAIN_LDM := $(strip ${TRAIN_LDM})

# By default, we assume that the user wants to use the corpora listed in
# TRAIN_LDM to build the Hierarchical Lexicalized Distortion Model (HLDM)
# if TRAIN_LDM is defined, or the corpora listed in TRAIN_TM and MIXTM if
# TRAIN_LDM is not defined.
ifdef USE_HLDM
   TRAIN_HLDM ?= $(or ${TRAIN_LDM}, $(sort ${TRAIN_TM} ${MIXTM}))
endif
TRAIN_HLDM := $(strip ${TRAIN_HLDM})

# TRAIN_SPARSE should be the concatenation of all TRAIN_TM and MIXTM, but
# creation of that corpus hasn't been added yet. For now, we add a hack to
# default TRAIN_SPARSE to the first word of TRAIN_TM.
ifdef USE_SPARSE
   TRAIN_SPARSE ?= $(firstword ${TRAIN_TM})
endif
TRAIN_SPARSE := $(strip ${TRAIN_SPARSE})

# Word class granularities for coarse LMs.
# Defining COARSELM_NCLS_LIST triggers word class training too.
# If defined, COARSELM_NCLS_LIST must contain integer values.
# We recommend using a 200-class coarse LM and an 800-class coarse LM.
ifdef USE_COARSELM
   COARSELM_NCLS_LIST ?= 200 800
endif
COARSELM_NCLS_LIST := $(strip ${COARSELM_NCLS_LIST})

# By default, we assume that the user wants to use PRIMARY_LM for the coarse LM,
# or TRAIN_LM (if no PRIMARY_LM), or MIXLM (if no PRIMARY_LM or TRAIN_LM).
ifdef USE_COARSELM
   TRAIN_COARSELM ?= $(or ${PRIMARY_LM},${TRAIN_LM},${MIXLM})
endif
TRAIN_COARSELM := $(strip ${TRAIN_COARSELM})

# With the MIT LM toolkit, a dev set can be used to tune the LM KN discounting
# parameters. It is recommended that a dev set other than the TUNE_DECODE set
# be used for such tuning; it's okay to use TUNE_RESCORE or TUNE_CE if different
# from TUNE_DECODE. To tune the discount parameters for coarse LMs, define
# TUNE_COARSELM below. By default, we assume the user wants to use the same
# dev set to tune the coarse LMs as was used to turn the normal word LM.
ifdef USE_COARSELM
   TUNE_COARSELM ?= ${TUNE_LM}
endif
TUNE_COARSELM := $(strip ${TUNE_COARSELM})

# Specifications for BiLMs.
# "word" indicates a word-based BiLM.
# Coarse BiLM specifications take the form: NNNbi-NNNs-NNNt where NNN is the
# word class granularity. Examples: 400bi-400s-400t, 400s-400t, 400bi
# Multiple BiLM specifications are permitted. Each will be applied to each
# corpus listed in TRAIN_BILM.
# If you enable BiLMs, we suggest a 400bi-400s-400t coarse BiLM
# (400 bitoken classes with 400 src word classes and 400 tgt word classes).
ifdef USE_BILM
   BILM_SPEC ?= 400bi-400s-400t
endif
BILM_SPEC := $(strip ${BILM_SPEC})

# By default, we assume that the user wants to use the TRAIN_TM corpora
# for BiLMs too.
ifdef USE_BILM
   TRAIN_BILM ?= ${TRAIN_TM}
endif
TRAIN_BILM := $(strip ${TRAIN_BILM})
BILM_NCLS_LIST := $(patsubst %s,%, $(patsubst %t,%, $(filter %s %t, $(subst -, ,${BILM_SPEC}))))

# With the MIT LM toolkit, a dev set can be used to tune the LM KN discounting
# parameters. It is recommended that a dev set other than the TUNE_DECODE set
# be used for such tuning; it's okay to use TUNE_RESCORE or TUNE_CE if different
# from TUNE_DECODE. To tune the discount parameters for BiLMs, define
# TUNE_BILM below. By default, we assume the user wants to use the same
# dev set to tune the BiLMs as was used to turn the normal word LM.
ifdef USE_BILM
   TUNE_BILM ?= ${TUNE_LM}
endif
TUNE_BILM := $(strip ${TUNE_BILM})

# Word class granularities for NNJMs.
# Defining NNJM_NCLS triggers word class training too.
# If defined, NNJM_NCLS must contain integer values.
# We recommend using a 400-class for NNJM.
ifneq ("${NNJM_TRAIN_CORPUS}","")
NNJM_NCLS ?= 400
endif

# NNJM specific options that should stay empty.
NNJM_TRAIN_NNJM_OPTS ?=
NNJM_GENEX_OPTS ?=

# Word class granularities to train.
# By default, we train word classes (wcl) for the granularites identified for
# use in coarse models (coarse LMs, coarse BiLMs, coarse TMs).
# => WCL_NCLS_LIST = COARSELM_NCLS_LIST BILM_NCLS_LIST
# Defining WCL_NCLS_LIST triggers word class training.
# If defined, WCL_NCLS_LIST must contain integer values.
WCL_NCLS_LIST ?= $(sort ${COARSELM_NCLS_LIST} ${BILM_NCLS_LIST} ${NNJM_NCLS})
WCL_NCLS_LIST := $(strip ${WCL_NCLS_LIST})

# By default, we assume that the user wants to use the corpora listed in
# TRAIN_LM and MIXLM to build the word classes.
ifdef WCL_NCLS_LIST
   TRAIN_WCL ?= $(sort ${TRAIN_LM} ${MIXLM} ${TRAIN_TM} ${MIXTM} ${TRAIN_COARSELM} ${TRAIN_BILM} \
						     ${NNJM_TRAIN_CORPUS} ${NNJM_FINE_TUNING_TRAIN_CORPUS})
endif
TRAIN_WCL := $(strip ${TRAIN_WCL})

# Define what type of language model we want to generate.
LM_TYPES ?= binlm

# Parameters for models/decode/Makefile
#TEMPLATE_DIR    ?= ${ROOT_DIR}/models/decode
PREFIX_DEV_COW  ?= ${TUNE_DECODE}
PREFIX_DEV_RAT  ?= ${TUNE_RESCORE}

# By default, we assume that the user wants to use the first corpus in PRIMARY_LM
# to build the truecasing models, or in TRAIN_LM (if no PRIMARY_LM), or in MIXLM
# (if no PRIMARY_LM or TRAIN_LM).
ifdef DO_TRUECASING
   TRAIN_TC ?= $(firstword $(or ${PRIMARY_LM},${TRAIN_LM},${MIXLM}))
endif
TRAIN_TC := $(strip ${TRAIN_TC})

# Define the truecasing model filenames.
TRUECASING_MAP ?= ${TRAIN_TC}_${TGT_LANG}.map
TRUECASING_LM  ?= ${TRAIN_TC}_${TGT_LANG}-kn-3g.binlm${GZ}

# Should we also use source language information in truecasing?
# NOTE: use of source language models in truecasing is not compatible with rescoring.
# We cannot use source language models for Arabic or Chinese as the source
# language because they are caseless.
ifeq ($(filter ${SRC_LANG}, ar ch),)
   # Comment out to disable use of source language info; uncomment to enable.
   TC_USE_SRC_MODELS ?= 1
endif

ifdef TC_USE_SRC_MODELS
TRUECASING_NC1_SRC_LM  ?= ${TRAIN_TC}_${SRC_LANG}.nc1.binlm${GZ}

# Source language locale used during truecasing.
# If the default of ${SRC_LANG}_${SRC_LOCALE_COUNTRY}.UTF-8 is not correct,
# uncomment the following and declare the correct string here.
# For example:
#SRC_LOCALE ?= da_DK.utf8
#SRC_LOCALE ?= ${SRC_LANG}_${SRC_LOCALE_COUNTRY}.UTF-8

# Target language locale used during truecasing.
# If the default of ${TGT_LANG}_${TGT_LOCALE_COUNTRY}.UTF-8 is not correct,
# uncomment the following and declare the correct string here.
#TGT_LOCALE ?= ${TGT_LANG}_${TGT_LOCALE_COUNTRY}.UTF-8

# Make sure the source and target language locales are set.
SRC_LOCALE ?= ${SRC_LANG}_${SRC_LOCALE_COUNTRY}.UTF-8
SRC_LOCALE := $(strip ${SRC_LOCALE})
TGT_LOCALE ?= ${TGT_LANG}_${TGT_LOCALE_COUNTRY}.UTF-8
TGT_LOCALE := $(strip ${TGT_LOCALE})
endif # TC_USE_SRC_MODELS

# When working with TMX files, we assume the language code in the TMX is the
# upper case of $SRC_LANG/$TGT_LANG followed by '-' (hyphen) followed by
# $SRC_LOCALE_COUNTRY/$TGT_LOCALE_COUNTRY. When that's not true,
# uncomment the following and declare the correct strings here.
#TMX_SRC = EN-CA
#TMX_TGT = FR-CA

# If we are lucky enough to have a cluster, we'll change the shell for certain
# commands and allow them to run on nodes.
ifdef USING_CLUSTER
FRAMEWORK_SHELL = run-parallel.sh
else
FRAMEWORK_SHELL = /bin/bash
endif

# Some commands shouldn't be run with the cluster shell, will use this one
# instead.
LOCAL_SHELL = /bin/bash


########################################
# LANGUAGE SPECIFICS
ifeq (${SRC_LANG},ar)
   DONT_LOWERCASE_SRC = 1
endif

# We include src_lang specific configuration just before we validate the configuration.
-include $(dir $(lastword ${MAKEFILE_LIST}))Makefile.params.${SRC_LANG}


########################################
# This sets a default value for PORTAGE_GENERIC_MODEL if it was not defined in
# the user's environment.
PORTAGE_GENERIC_MODEL ?= ${PORTAGE}/generic-model


########################################
# VALIDATION

ifeq (${SRC_LANG},)
$(error You must provide a SRC_LANG!)
endif

ifeq (${TGT_LANG},)
$(error You must provide a TGT_LANG!)
endif

ifeq (${SRC_LANG},${TGT_LANG})
$(error SRC_LANG=${SRC_LANG} cannot be the same as TGT_LANG=${TGT_LANG}!)
endif

ifeq (${SRC_LOCALE_COUNTRY},)
$(error You must provide a SRC_LOCALE_COUNTRY!)
endif

ifeq (${TGT_LOCALE_COUNTRY},)
$(error You must provide a TGT_LOCALE_COUNTRY!)
endif

ifeq ($(strip ${TRAIN_LM} ${LM_PRETRAINED_TGT_LMS} ${MIXLM} ${MIXLM_PRETRAINED_TGT_LMS}),)
$(error You must always define a training corpus and/or pretrained models for language models)
endif

ifneq ($(strip ${MIXLM} ${MIXLM_PRETRAINED_TGT_LMS}),)
ifeq ($(words ${MIXLM} ${MIXLM_PRETRAINED_TGT_LMS}),1)
$(error You must specify multiple LMs in MIXLM and/or MIXLM_PRETRAINED_TGT_LMS to train a MixLM)
endif
endif

ifeq ($(strip ${TRAIN_TM} ${TM_PRETRAINED_TMS} ${MIXTM} ${MIXTM_PRETRAINED_TMS}),)
$(error You must always define a training corpus and/or pretrained models for translation models)
endif

ifdef USE_MIXTM
ifeq (${MIXTM_TRAIN_MIX},)
$(error With a MixTM, you must define a training corpus (MIXTM_TRAIN_MIX) for training the mixture weights)
endif
ifneq ($(words ${MIXTM_TRAIN_MIX}),1)
$(error You must not provide more than one corpus in MIXTM_TRAIN_MIX)
endif
ifeq (${TUNE_MIXTM},)
$(error With a MixTM, you must define a tuning corpus (TUNE_MIXTM) for tuning the mixture weights)
endif
ifneq ($(filter FAST_ALIGN,${MERGED_CPT_ZN_MODEL} ${MERGED_CPT_JPT_TYPES}),)
ifeq (${UNITTESTING},)
$(error MixTM with fast_align is not implemented.)
# There is no force align with fast_align. To create TUNE_MIXTM's word
# alignment file, we would have to create a fast_align model with train then we
# would train a second model where we concatenate train + TUNE_MIXTM from which
# we would only keep the word alignment file for the subset of TUNE_MIXTM.
endif
endif
endif # USE_MIXTM

ifeq (${TUNE_DECODE},)
$(error You must always define a tuning corpus to train the decoder)
endif

ifdef DO_CE
ifeq (${TUNE_CE},)
$(error When asking for confidence estimation, you must also define a TUNE_CE)
endif
ifneq (${REFERENCE_INDICES},)
$(error Multiple references is not supported with confidence estimation)
endif
endif

ifdef DO_RESCORING
ifeq (${TUNE_RESCORE},)
$(error When asking for rescoring, you must also define a TUNE_RESCORE)
endif
ifdef TC_USE_SRC_MODELS
$(error When asking for rescoring, you must not define TC_USE_SRC_MODELS)
endif
endif

ifdef DO_TRUECASING
ifeq (${TRAIN_TC},)
$(error With truecasing, you must define a training corpus (TRAIN_TC) for training the truecasing model)
endif
# Check the SRC_LOCALE and TGT_LOCALE if needed for truecasing, but only once.
ifdef TC_USE_SRC_MODELS
ifeq (${MAKELEVEL},0)
CHECK_LOCALE = $(shell perl -e "use POSIX qw(locale_h); exit 1 unless defined setlocale(LC_CTYPE,q($1));"; echo $$?)
ifeq (${SRC_LOCALE},)
   $(error With truecasing with TC_USE_SRC_MODELS, you must define the SRC_LOCALE!)
else
   LOCALE_RC := $(call CHECK_LOCALE,${SRC_LOCALE})
   ifneq (${LOCALE_RC},0)
     $(info SRC_LANG: ${SRC_LANG})
     $(info SRC_LOCALE_COUNTRY: ${SRC_LOCALE_COUNTRY})
     $(info SRC_LOCALE: ${SRC_LOCALE})
     $(error Error: Invalid locale ${SRC_LOCALE}; check values of SRC_LOCALE, SRC_LANG, SRC_LOCALE_COUNTRY; \
             if correct, locale ${SRC_LOCALE} needs to be installed)
   endif
endif # SRC_LOCALE
ifeq (${TGT_LOCALE},)
   $(error With truecasing with TC_USE_SRC_MODELS, you must define the TGT_LOCALE!)
else
   LOCALE_RC := $(call CHECK_LOCALE,${TGT_LOCALE})
   ifneq (${LOCALE_RC},0)
     $(info TGT_LANG: ${TGT_LANG})
     $(info TGT_LOCALE_COUNTRY: ${TGT_LOCALE_COUNTRY})
     $(info TGT_LOCALE: ${TGT_LOCALE})
     $(error Error: Invalid locale ${TGT_LOCALE}; check values of TGT_LOCALE, TGT_LANG, TGT_LOCALE_COUNTRY; \
             if correct, locale ${TGT_LOCALE} needs to be installed)
   endif
endif # TGT_LOCALE
endif # MAKELEVEL 0
endif # TC_USE_SRC_MODELS
endif # DO_TRUECASING

ifdef USE_LDM
ifeq (${TRAIN_LDM},)
$(error With USE_LDM, you must define a training corpus (TRAIN_LDM) for training the distortion model)
endif
endif

ifdef USE_HLDM
ifeq (${TRAIN_HLDM},)
$(error With USE_HLDM, you must define a training corpus (TRAIN_HLDM) for training the distortion model)
endif
endif

ifdef USE_SPARSE
ifeq (${TRAIN_SPARSE},)
$(error With USE_SPARSE, you must define a training corpus (TRAIN_SPARSE) for training the sparse features)
endif
endif

ifdef USE_COARSELM
ifeq (${TRAIN_COARSELM},)
$(error With USE_COARSELM, you must define a training corpus (TRAIN_COARSELM) for training the coarse LM(s))
endif
ifeq (${COARSELM_NCLS_LIST},)
$(error With USE_COARSELM, you must define word class granularities (COARSELM_NCLS_LIST) for training the coarse LM(s))
endif
endif

ifdef USE_BILM
ifeq (${TRAIN_BILM},)
$(error With USE_BILM, you must define a training corpus (TRAIN_BILM) for training the BiLM(s))
endif
ifeq (${BILM_SPEC},)
$(error With USE_BILM, you must define BiLM specification(s) (BILM_SPEC) for training the BiLM(s))
endif
endif

ifdef WCL_NCLS_LIST
ifeq (${TRAIN_WCL},)
$(error With WCL_NCLS_LIST, you must define a training corpus (TRAIN_WCL) for training the word classes)
endif
endif

# Warn the user that the IBM4 feature is not available to him since he doesn't have mgiza.
ifneq ($(or $(filter IBM4, ${MERGED_CPT_ZN_MODEL} ${MERGED_CPT_JPT_TYPES}), $(filter ibm4_cpt, ${PT_TYPES})),)
ifneq ($(strip $(shell which-test.sh mgiza && echo "true")),true)
$(error You cannot use the IBM4 feature since you don't have mgiza installed.)
endif
endif

ifneq ($(strip ${NNJM_TRAIN_CORPUS}),)
ifeq ($(strip ${NNJM_DEV_CORPUS}),)
$(error You must define NNJM_DEV_CORPUS when you have defined NNJM_TRAIN_CORPUS.)
endif
endif

ifneq ($(strip ${NNJM_FINE_TUNING_TRAIN_CORPUS}),)
ifeq ($(strip ${NNJM_FINE_TUNING_DEV_CORPUS}),)
$(error You must define NNJM_FINE_TUNING_DEV_CORPUS when you have defined NNJM_FINE_TUNING_TRAIN_CORPUS.)
endif
endif

ifneq (${MIXTM_PRETRAINED_TMS:.tppt=},${MIXTM_PRETRAINED_TMS})
$(error You must provide .gz MIXTM_PRETRAINED_TMS and not .tppt files.)
endif


########################################
# Finally, output a blank line.
ifeq (${MAKELEVEL},0)
$(info )
endif