-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathHyper_Parameter.yaml
127 lines (119 loc) · 3.65 KB
/
Hyper_Parameter.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
Sound:
Spectrogram_Dim: 1025
Mel_Dim: 80
Frame_Length: 1024
Frame_Shift: 256
Sample_Rate: 24000
Mel_F_Min: 125
Mel_F_Max: 7600
Max_Abs_Mel: 4
Confidence_Threshold: 0.6
Gaussian_Smoothing_Sigma: 0.0
Quantinized_Pitch_Dim: 256
Content_Encoder:
Conv:
Channels: [512, 512, 512]
Kernel_Sizes: [5, 5, 5]
Use_GroupNorm: true
LSTM:
Sizes: 16
Stacks: 2
Frequency: 16
Style_Encoder:
Embedding_Size: 256
LSTM:
Sizes: 256
Stacks: 3
Inference:
Samples: 5
Slice_Length: 64
Overlap_Length: 32
Checkpoint_Path: './Style_Encoder/Example_Results/Checkpoint/S_100000.pkl'
Decoder:
Use_Pitch: true #F0-AutoVC
# In paper here is pre conv, but there is no conv in official code.
Pre_LSTM:
Sizes: 512 # In official code 512, paper 1024
Stacks: 3 # Vanilla: 1
Conv:
Channels: [] # Vanilla: [512, 512, 512]
Kernel_Sizes: [] # Vanilla: [5, 5, 5]
Use_GroupNorm: true
Dropout: 0.0 # This part is customized.
Post_LSTM:
Sizes: 1024
Stacks: 0 # Vanilla: 2, F0: 0
Postnet:
Channels: [512, 512, 512, 512]
Kernel_Sizes: [5, 5, 5, 5]
Activation: 'relu' # Vanilla: 'tanh'
Use_GroupNorm: true
WaveNet:
Residual_Channels: 64
ResConvGLU:
Blocks: 3
Stacks_in_Block: 10
Gate_Channels: 128
Kernel_Size: 3
Skip_Channels: 64
Dropout_Rate: 0.0
Upsample:
Scales: [4, 4, 4, 4]
Pad: 2
# Checkpoint_Path: './PWGAN/Example_Results/Checkpoint/S_400000.pkl' # If nothing, null
# Checkpoint_Path: "D:/GoogleDrive/Colab_Test/PWGAN_Torch/SR24K.Results/Checkpoint/S_85000.pkl"
#Checkpoint_Path: './PWGAN/S_300000.pkl'
Checkpoint_Path: 'D:/PWGAN.Results/SR24K.Results/Checkpoint/S_177000.pkl'
Train:
Use_Pattern_Cache: true
Train_Pattern:
#Path: '../Pattern/SS.SR24K.Pattern/Train'
Path: 'C:/Pattern/SS.SR24K.VCTK106.Pattern/Train'
Metadata_File: 'METADATA.PICKLE' # 'METADATA.PICKLE'
Mel_Length: # This must be a multiple of Content_Encoder/Frequency
Min: 64
Max: 192
Interpolation:
Min: 0.7
Max: 1.35
Energy_Variance: # To calculate simply
Min: 0.35
Max: 1.0
Use_Style_from_Content_Mel: false
Accumulated_Dataset_Epoch: 1 # This is to prevent slow down from torch.utils.data.DataLoader when speaker is small.
Eval_Pattern:
# Path: '../Pattern/SS.SR24K.Pattern/Eval'
Path: 'C:/Pattern/SS.SR24K.VCTK106.Pattern/Eval'
Metadata_File: 'METADATA.PICKLE'
Num_Workers: 2
Batch_Size: 2
Loss_Weight:
Pre_Mel_L1: 0.0
Post_Mel_L1: 0.0
Pre_Mel_L2: 1.0
Post_Mel_L2: 1.0
Content_L1: 1.0
Content_L2: 0.0
Learning_Rate:
Initial: 1.0e-4
Decay_Step: 1000000 # Not applied
Decay_Rate: 1.0 # Not applied
ADAM:
Beta1: 0.9
Beta2: 0.999
Epsilon: 1.0e-7
Gradient_Norm: 10.0
Max_Step: 400000
Checkpoint_Save_Interval: 5000
Logging_Interval: 100
Evaluation_Interval: 1000
Inference_Interval: 5000
Initial_Inference: true
# Inference_Path: './SR24K.Results.VCTK/Inference'
# Checkpoint_Path: './SR24K.Results.VCTK/Checkpoint'
# Log_Path: './SR24K.Results.VCTK/Log'
Inference_Path: 'D:/AutoVC.Results/SR24K.Results.VCTKLibri/Inference'
Checkpoint_Path: 'D:/AutoVC.Results/SR24K.Results.VCTKLibri/Checkpoint'
Log_Path: 'D:/AutoVC.Results/SR24K.Results.VCTKLibri/Log'
Use_Mixed_Precision: true # apex is required.
Device: '0'