-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsetup.sh
120 lines (107 loc) · 3.15 KB
/
setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/bin/bash
root_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
data_dir=$root_dir/data
software_dir=$root_dir/software
mkdir -p $software_dir
# === Installing Sockeye (forked from v1.18.82)
# Note: Sockeye requires python3
sockeye_path=$software_dir/sockeye
if [ ! -d $sockeye_path ]; then
cd $software_dir
git clone https://github.com/xingniu/sockeye.git
cd $sockeye_path
git checkout style-mt
# See the detailed instruction on https://github.com/xingniu/sockeye/blob/style-mt/docs/setup.md
pip install . --no-deps -r requirements/requirements.gpu-cu90.txt
fi;
# === Installing Moses scripts (commit: 06f519d)
moses_scripts_path=$software_dir/moses-scripts
if [ ! -d $moses_scripts_path ]; then
cd $software_dir
git clone https://github.com/moses-smt/mosesdecoder.git
cd mosesdecoder
git checkout 06f519d
cd $software_dir
mv mosesdecoder/scripts moses-scripts
rm -rf mosesdecoder
fi;
# === Installing BPE scripts (commit: d21ced8)
if [ ! -d $software_dir/subword-nmt ]; then
cd $software_dir
git clone https://github.com/rsennrich/subword-nmt.git
cd subword-nmt
git checkout d21ced8
fi;
# === Installing KenLM
if [ ! -d $software_dir/kenlm ]; then
cd $software_dir
git clone https://github.com/kpu/kenlm.git
cd kenlm
mkdir -p build
cd build
cmake ..
make -j 4
fi;
# === Installing Meteor-1.5
if [ ! -d $software_dir/meteor-1.5 ]; then
cd $software_dir
wget http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.5.tar.gz
tar -xzf meteor-1.5.tar.gz
rm meteor-1.5.tar.gz
fi;
# === Installing nlp-util
nlp_util_path=$software_dir/nlp-util
if [ ! -d $nlp_util_path ]; then
cd $software_dir
git clone https://github.com/xingniu/nlp-util.git
fi;
# === Pre-processing the GYAFC corpus (train/dev/test/system_output)
# Follow the instruction on https://github.com/raosudha89/GYAFC-corpus to get the GYAFC corpus.
# Copy GYAFC_Corpus.zip into /data
if [ ! -f $data_dir/GYAFC_Corpus.zip ]; then
echo "Please copy GYAFC_Corpus.zip into /data"
exit;
fi;
GYAFC_path=$data_dir/GYAFC_Corpus
if [ ! -d $GYAFC_path ]; then
cd $data_dir
unzip GYAFC_Corpus.zip
rm -rf __MACOSX
. GYAFC-preprocess.sh
fi;
# === Downloading cleaned OpenSubtitles2016 parallel data (train/dev/test)
OpenSubtitles2016_path=$data_dir/OpenSubtitles2016
if [ ! -d $OpenSubtitles2016_path ]; then
cd $data_dir
wget https://obj.umiacs.umd.edu/mt-data/OpenSubtitles2016.en-fr.16M.tgz
tar -xf OpenSubtitles2016.en-fr.16M.tgz
rm OpenSubtitles2016.en-fr.16M.tgz
fi;
# === Downloading and pre-processing the Europarl-v7 corpus
corpus_path=$data_dir/Europarl-v7
if [ ! -d $corpus_path ]; then
cd $data_dir
corpus_name=Europarl
. corpus-preprocess.sh
fi;
# === Downloading and pre-processing the NewsCommentary-v14 corpus
corpus_path=$data_dir/NewsCommentary-v14
if [ ! -d $corpus_path ]; then
cd $data_dir
corpus_name=NewsCommentary
. corpus-preprocess.sh
fi;
# === Downloading and pre-processing the WMT14 test set
corpus_path=$data_dir/WMT14
if [ ! -d $corpus_path ]; then
cd $data_dir
corpus_name=WMT14
. corpus-preprocess.sh
fi;
# === Downloading and pre-processing the MSLT test set
corpus_path=$data_dir/MSLT
if [ ! -d $corpus_path ]; then
cd $data_dir
corpus_name=MSLT
. corpus-preprocess.sh
fi;