-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_utils.py
138 lines (106 loc) · 5.96 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from music_utils import *
from preprocess import *
from keras.utils import to_categorical
chords, abstract_grammars = get_musical_data('data/original_metheny.mid')
corpus, tones, tones_indices, indices_tones = get_corpus_data(abstract_grammars)
N_tones = len(set(corpus))
n_a = 64
x_initializer = np.zeros((1, 1, 78))
a_initializer = np.zeros((1, n_a))
c_initializer = np.zeros((1, n_a))
def load_music_utils():
chords, abstract_grammars = get_musical_data('data/original_metheny.mid')
corpus, tones, tones_indices, indices_tones = get_corpus_data(abstract_grammars)
N_tones = len(set(corpus))
X, Y, N_tones = data_processing(corpus, tones_indices, 60, 30)
return (X, Y, N_tones, indices_tones)
def generate_music(inference_model, corpus = corpus, abstract_grammars = abstract_grammars, tones = tones, tones_indices = tones_indices, indices_tones = indices_tones, T_y = 10, max_tries = 1000, diversity = 0.5):
"""
Generates music using a model trained to learn musical patterns of a jazz soloist. Creates an audio stream
to save the music and play it.
Arguments:
model -- Keras model Instance, output of djmodel()
corpus -- musical corpus, list of 193 tones as strings (ex: 'C,0.333,<P1,d-5>')
abstract_grammars -- list of grammars, on element can be: 'S,0.250,<m2,P-4> C,0.250,<P4,m-2> A,0.250,<P4,m-2>'
tones -- set of unique tones, ex: 'A,0.250,<M2,d-4>' is one element of the set.
tones_indices -- a python dictionary mapping unique tone (ex: A,0.250,< m2,P-4 >) into their corresponding indices (0-77)
indices_tones -- a python dictionary mapping indices (0-77) into their corresponding unique tone (ex: A,0.250,< m2,P-4 >)
Tx -- integer, number of time-steps used at training time
temperature -- scalar value, defines how conservative/creative the model is when generating music
Returns:
predicted_tones -- python list containing predicted tones
"""
# set up audio stream
out_stream = stream.Stream()
# Initialize chord variables
curr_offset = 0.0 # variable used to write sounds to the Stream.
num_chords = int(len(chords) / 3) # number of different set of chords
print("Predicting new values for different set of chords.")
# Loop over all 18 set of chords. At each iteration generate a sequence of tones
# and use the current chords to convert it into actual sounds
for i in range(1, num_chords):
# Retrieve current chord from stream
curr_chords = stream.Voice()
# Loop over the chords of the current set of chords
for j in chords[i]:
# Add chord to the current chords with the adequate offset, no need to understand this
curr_chords.insert((j.offset % 4), j)
# Generate a sequence of tones using the model
_, indices = predict_and_sample(inference_model)
indices = list(indices.squeeze())
pred = [indices_tones[p] for p in indices]
predicted_tones = 'C,0.25 '
for k in range(len(pred) - 1):
predicted_tones += pred[k] + ' '
predicted_tones += pred[-1]
#### POST PROCESSING OF THE PREDICTED TONES ####
# We will consider "A" and "X" as "C" tones. It is a common choice.
predicted_tones = predicted_tones.replace(' A',' C').replace(' X',' C')
# Pruning #1: smoothing measure
predicted_tones = prune_grammar(predicted_tones)
# Use predicted tones and current chords to generate sounds
sounds = unparse_grammar(predicted_tones, curr_chords)
# Pruning #2: removing repeated and too close together sounds
sounds = prune_notes(sounds)
# Quality assurance: clean up sounds
sounds = clean_up_notes(sounds)
# Print number of tones/notes in sounds
print('Generated %s sounds using the predicted values for the set of chords ("%s") and after pruning' % (len([k for k in sounds if isinstance(k, note.Note)]), i))
# Insert sounds into the output stream
for m in sounds:
out_stream.insert(curr_offset + m.offset, m)
for mc in curr_chords:
out_stream.insert(curr_offset + mc.offset, mc)
curr_offset += 4.0
# Initialize tempo of the output stream with 130 bit per minute
out_stream.insert(0.0, tempo.MetronomeMark(number=130))
# Save audio stream to fine
mf = midi.translate.streamToMidiFile(out_stream)
mf.open("output/my_music.midi", 'wb')
mf.write()
print("Your generated music is saved in output/my_music.midi")
mf.close()
# Play the final stream through output (see 'play' lambda function above)
# play = lambda x: midi.realtime.StreamPlayer(x).play()
# play(out_stream)
return out_stream
def predict_and_sample(inference_model, x_initializer = x_initializer, a_initializer = a_initializer,
c_initializer = c_initializer):
"""
Predicts the next value of values using the inference model.
Arguments:
inference_model -- Keras model instance for inference time
x_initializer -- numpy array of shape (1, 1, 78), one-hot vector initializing the values generation
a_initializer -- numpy array of shape (1, n_a), initializing the hidden state of the LSTM_cell
c_initializer -- numpy array of shape (1, n_a), initializing the cell state of the LSTM_cel
Ty -- length of the sequence you'd like to generate.
Returns:
results -- numpy-array of shape (Ty, 78), matrix of one-hot vectors representing the values generated
indices -- numpy-array of shape (Ty, 1), matrix of indices representing the values generated
"""
### START CODE HERE ###
pred = inference_model.predict([x_initializer, a_initializer, c_initializer])
indices = np.argmax(pred, axis = -1)
results = to_categorical(indices, num_classes=78)
### END CODE HERE ###
return results, indices