-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathwikitext_bpe_finetune.sh
48 lines (44 loc) · 3.22 KB
/
wikitext_bpe_finetune.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# finetune load centroid
CUDA_VISIBLE_DEVICES=2,3,4,5,6,7,8,9 python train.py --task language_modeling \
data-bin/wikitext103-bpe \
--save-dir checkpoints/wikitext103-bpe-centroid-finetune \
--arch transformer_lm_wikibpe --restore-file checkpoints/wikitext103-bpe/checkpoint_best.pt \
--finetune-centroids --reset-optimizer --reset-dataloader --reset-meters \
--criterion agg_softmax --use-last-ffn-input --knn-keytype last_ffn_input \
--load-centroids checkpoints/wikitext103-bpe/centroids.npy \
--load-centroid-distribution checkpoints/wikitext103-bpe/cluster_freq.npz \
--max-update 28600 --max-lr 1.0 --t-mult 2 --lr-period-updates 27000 --lr-scheduler cosine --lr-shrink 0.75 \
--warmup-updates 1600 --warmup-init-lr 1e-07 --min-lr 1e-09 --optimizer nag --lr 0.0001 --clip-norm 0.1 \
--max-tokens 3072 --update-freq 3 --tokens-per-sample 3072 --seed 1 \
--sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=no_c10d --fp16
# finetune with
CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python train.py --task language_modeling \
data-bin/wikitext103-bpe \
--save-dir checkpoints/wikitext103-bpe-centroid-finetune-longer \
--arch transformer_lm_wikibpe --restore-file checkpoints/wikitext103-bpe/checkpoint_best.pt \
--finetune-centroids --reset-optimizer --reset-dataloader --reset-meters \
--criterion agg_softmax --use-last-ffn-input --knn-keytype last_ffn_input \
--load-centroids checkpoints/wikitext103-bpe/centroids.npy \
--load-centroid-distribution checkpoints/wikitext103-bpe/cluster_freq.npz \
--max-update 143000 --max-lr 1.0 --t-mult 2 --lr-period-updates 135000 --lr-scheduler cosine --lr-shrink 0.75 \
--warmup-updates 8000 --warmup-init-lr 1e-07 --min-lr 1e-09 --optimizer nag --lr 0.0001 --clip-norm 0.1 \
--max-tokens 3072 --update-freq 4 --tokens-per-sample 3072 --seed 1 \
--sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=no_c10d --fp16
# finetune continue
CUDA_VISIBLE_DEVICES=6,7 python train.py --task language_modeling \
data-bin/wikitext103-bpe \
--save-dir checkpoints/wikitext103-bpe-centroid-finetune-longer \
--arch transformer_lm_wikibpe --restore-file checkpoints/wikitext103-bpe-centroid-finetune-longer/checkpoint_best.pt \
--finetune-centroids \
--criterion agg_softmax --use-last-ffn-input --knn-keytype last_ffn_input \
--load-centroid-distribution checkpoints/wikitext103-bpe/cluster_freq.npz \
--max-update 143000 --max-lr 1.0 --t-mult 2 --lr-period-updates 135000 --lr-scheduler cosine --lr-shrink 0.75 \
--warmup-updates 8000 --warmup-init-lr 1e-07 --min-lr 1e-09 --optimizer nag --lr 0.0001 --clip-norm 0.1 \
--max-tokens 3072 --update-freq 12 --tokens-per-sample 3072 --seed 1 \
--sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=no_c10d --fp16
# eval finetuned
python eval_lm.py data-bin/wikitext103-bpe --path checkpoints/wikitext103-bpe-centroid-finetune-longer/checkpoint_last.pt \
--sample-break-mode complete --max-tokens 3072 --context-window 2560 --softmax-batch 1024 \
--gen-subset valid --bpe subword_nmt --remove-bpe \
--model-overrides "{'knn_keytype': 'last_ffn_input', 'use_last_ffn_input': True}" \
--load-centroid-distribution checkpoints/wikitext103-bpe/cluster_freq.npz