-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkshape_res_to_jld2.jl
170 lines (135 loc) · 6.04 KB
/
kshape_res_to_jld2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
using ClustForOpt
using JLD2
using FileIO
using PyCall
util_path = normpath(joinpath(CLUST_FOR_OPT,"src","utils"))
unshift!(PyVector(pyimport("sys")["path"]), util_path) # add util path to search path ### unshift!(PyVector(pyimport("sys")["path"]), "") # add current path to search path
@pyimport load_clusters
region = "GER" # "CA" "GER"
#### DATA INPUT ######
# Input options:
# region: "GER", "CA"
# results_data:
# kshape_it1000_max20000
# kshape_it1000_max100
# kshape_it10000_max100
# opt_problem:
# battery
# gas_turbine
# number of clusters - should be 9
n_k=9
n_clust_min=1
n_clust_max = n_k
n_init =1000
iterations=1000
# create directory where data is saved
try
mkdir("outfiles")
catch
# do nothing
end
# save settings in txt file
df = DataFrame()
df[:n_clust_min]=n_clust_min
df[:n_clust_max]=n_clust_max
df[:n_init]=n_init
df[:iterations]=iterations
df[:region]=region
n_clust_ar = collect(n_clust_min:n_clust_max)
writetable(joinpath("outfiles",string("parameters_kshape_",region,".txt")),df)
###### load data from pkl and transform to usable format ####
# read in original data
data_orig_daily = load_pricedata(region)
seq = data_orig_daily[:,1:365] # do not load as sequence
problem_type_ar = ["battery", "gas_turbine"]
# calc hourly mean and sdv, Note: For GER, these are in EUR, since the original data is in EUR
# sequence based normalization
seq_norm, hourly_mean, hourly_sdv = z_normalize(seq;scope="sequence")
# initialize dictionaries of the loaded data (key: number of clusters)
# for debugging - DELETE LATER
kshape_centroids_in=[]
kshape_centroids_all=[]
kshape_centroids = Dict()
kshape_labels = Dict()
# kshape_dist_daily = Dict()
kshape_dist = Dict()
kshape_dist_all = Dict()
kshape_iterations = Dict()
ind_conv = Dict()
num_conv = zeros(Int32,n_k) # number of converged values
kshape_weights = Dict()
path_scratch = normpath(joinpath(pwd(),"outfiles","pickle_save" ))
for k=1:n_k
kshape_iterations[k] = load_clusters.load_pickle(normpath(joinpath(path_scratch,region * "iterations_kshape_" * string(k) * ".pkl")))
kshape_labels[k] = load_clusters.load_pickle(normpath(joinpath(path_scratch,region * "labels_kshape_" * string(k) * ".pkl"))) .+1 # python to julia indexing
ind_conv[k] = find(collect(kshape_iterations[k]) .< iterations-1) # only converged values - collect() transforms tuple to array
num_conv[k] = length(ind_conv[k])
kshape_iterations[k] = kshape_iterations[k][ind_conv[k]] #only converged values
kshape_centroids_in = load_clusters.load_pickle(normpath(joinpath(path_scratch, region * "_centroids_kshape_" * string(k) * ".pkl")))
# transpose centroids in order to bring them to the same format as dtw, kmeans, etc.
kshape_centroids_all=[]
for i=1:length(kshape_centroids_in)
push!(kshape_centroids_all,kshape_centroids_in[i]')
end ## TODO TEST
#### back transform centroids from normalized data
kshape_centroids[k] = zeros(size(kshape_centroids_all[1])[1],size(kshape_centroids_all[1])[2],num_conv[k]) # only converged values
for i=1:num_conv[k]
kshape_centroids[k][:,:,i] = undo_z_normalize(kshape_centroids_all[ind_conv[k][i]],hourly_mean,hourly_sdv; idx=kshape_labels[k][ind_conv[k][i]])
end
kshape_dist[k] = load_clusters.load_pickle(normpath(joinpath(path_scratch,region * "distance_kshape_" * string(k) * ".pkl")))[ind_conv[k]] # only converged
kshape_dist_all[k] = load_clusters.load_pickle(normpath(joinpath(path_scratch,region * "distance_kshape_" * string(k) * ".pkl")))
# calculate weights
kshape_weights[k] = zeros(k,num_conv[k]) # only converged
for i=1:num_conv[k]
for j=1:length(kshape_labels[k][ind_conv[k][i]])
kshape_weights[k][kshape_labels[k][ind_conv[k][i]][j],i] +=1
end
kshape_weights[k][:,i] = kshape_weights[k][:,i]/length(kshape_labels[k][ind_conv[k][i]])
end
end #k=1:n_k
##### end of load data from pickle ###########
# initialize dictionaries of the loaded data (key: number of clusters)
########
######
###
# TODO: eliminate n_init, substitue num_conv -> make cost and iter Dicts as well - there must be zeros in revenue (now solved) and in cost -> which leads to the 0/0 pair in the plot
centers = Dict{Tuple{Int,Int},Array}()
clustids = Dict{Tuple{Int,Int},Array}()
cost = Dict{Int,Array}()
iter = Dict{Int,Array}()
weights = Dict{Tuple{Int,Int},Array}()
revenue = Dict{String,Dict}()
for i=1:length(problem_type_ar)
revenue[problem_type_ar[i]] = Dict{Int,Array}() # could do this one as well: []
# zeros(length(n_clust_ar),n_init)
end
# iterate through settings
for n_clust_it=1:length(n_clust_ar)
n_clust = n_clust_ar[n_clust_it] # use for indexing Dicts
cost[n_clust] = zeros(Float64,num_conv[n_clust_it])
iter[n_clust] = zeros(Int,num_conv[n_clust_it])
# initialize revenue array within dict
for ii=1:length(problem_type_ar)
revenue[problem_type_ar[ii]][n_clust] = zeros(Float64,num_conv[n_clust_it])
end
for i = 1:num_conv[n_clust_it]
centers[n_clust,i]= kshape_centroids[n_clust_it][:,:,i]
clustids[n_clust,i] = kshape_labels[n_clust_it][i]
cost[n_clust][i] = kshape_dist[n_clust_it][i]
iter[n_clust][i] = kshape_iterations[n_clust_it][i]
weights[n_clust,i] = kshape_weights[n_clust_it][:,i] # weights
# run opt
for ii=1:length(problem_type_ar)
revenue[problem_type_ar[ii]][n_clust][i]=sum(run_opt(problem_type_ar[ii],(centers[n_clust,i]),weights[n_clust,i],region,false))
end
end
end
# save files to jld2 file
save_dict = Dict("centers"=>centers,
"clustids"=>clustids,
"cost"=>cost,
"iter"=>iter,
"weights"=>weights,
"revenue"=>revenue )
save(string(joinpath("outfiles","aggregated_results_kshape_"),region,".jld2"),save_dict)
println("kshape data revenue calculated + saved.")