-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.toml
75 lines (67 loc) · 1.13 KB
/
config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
[global]
dataset = "mastero"
random_slice_seed = 123
val_ratio = 0.1
train_val_split_seed = 666
[model.transformer]
lr = 1e-3 # TODO: we can use a learning rate scheduler
seq_len = 64
batch_size = 64
n_notes = 128
embed_dim = 128
hidden_dim = 1024
clip_grad = 5.0
num_encoder_layers = 3
num_decoder_layers = 3
num_heads = 8
src_vocab_size = 2
tgt_vocab_size = 128
[model.lstm_attn]
lr = 1e-3
seq_len = 128
batch_size = 64
n_notes = 128
hidden_dim = 512
dropout_p = 0.5
clip_grad = 5.0
[model.vanilla_rnn]
lr = 1e-3
seq_len = 64
batch_size = 64
n_notes = 128
embed_dim = 32
hidden_dim = 256
clip_grad = 5.0
[model.attention_rnn]
lr = 1e-3
seq_len = 64
batch_size = 64
n_notes = 128
embed_dim = 32
encode_hidden_dim = 512
decode_hidden_dim = 1024
clip_grad = 5.0
[model.cnn]
lr = 1e-3
embed_dim = 32
[sampling.default]
strategy = "stochastic"
top_p = 0.9
top_k = 4
repeat_decay = 0.6
temperature = 1.5
hint = ["1"]
[sampling.beta]
strategy = "stochastic"
top_p = 0.9
top_k = 4
repeat_decay = 0.6
temperature = 1.2
hint = ["1", "3"]
[sampling.beam]
strategy = "beam"
repeat_decay = 0.6
hint = ["1", "3", "5"]
num_beams = 5
beam_prob = 0.5
temperature = 1