-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplotter.py
216 lines (153 loc) · 5.91 KB
/
plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
'''
Name: plotter.py
Author: Timur Kasimov
Created: June 2024
Updated: July 2024
Purpose:
Plots aggregated data for electricity generation and saves
the graph as well as excel file with values used for plotting
'''
import pandas as pd
import os
import matplotlib.pyplot as plt
import mappings
import country_groups
import psr_groups
def get_combined_df(region, freq, start, end, columns, type_name):
'''
inputs:
________
region: str
freq: str
start: int
end: int
columns: list
type_name str
outputs:
____________
combined_df: pandas df
'''
df_dict = {}
# record each year's combined production values in a dictionary to then find min, max, mean
for year in range(start, end+1):
df_year = pd.read_excel('./combined/' + region + ' ' + freq + '.xlsx', sheet_name=str(year))
# add columns for combined production based on psr groups (columns)
existing_columns = [col for col in columns if col in df_year.columns] # this should fix the issue of a region not having certain production tyeps
df_year[type_name] = df_year[existing_columns].sum(axis=1)
df_dict[year] = df_year
# index all dataframes with sequential integers
# this is okay for days (365 or 366) and months (12) but a
# little problematic for weeks (mostly gives 53 instead of 52 and cuts off
# first and last week weirdly)
for key, df in df_dict.items():
df = df.drop('Unnamed: 0', axis=1) # this might be unnecessary now since only storing one column
df_dict[key] = df
# i think concat just does all years, not the specified ones
# SOLVED: only keep the necessary years in df_dict from the beginning
combined_df = pd.concat(df_dict.values(), keys=df_dict.keys())
return combined_df
def max(combined_df):
max_df = combined_df.groupby(level=1).max()
return max_df
def min(combined_df):
min_df = combined_df.groupby(level=1).min()
return min_df
def mean(combined_df):
# combined_df =pd.DataFrame
mean_df = combined_df.groupby(level=1).mean()
return mean_df
def my_plot(region, freq, psr_codes, type_name, range=None, years=None, output_table_file=False):
'''
inputs:
________
region: str
freq: str
psr_codes: list
type_name (custom): str
range (for mean and min-max spread): tuple of shape (year1, year2)
years (for individual lines): list
outputs:
____________
* shows and saves plot as an image
* saves xlsx file with all values used in the plot
'''
basename = './combined/' + region + ' ' + freq
title = region + ' ' + type_name + ' Generation'
if range is not None:
range_str = str(range[0]) + '-' + str(range[1])
else:
range_str = ''
# put all psr types in a list
# note this is implemented for generation only.
columns = []
for code in psr_codes:
column = mappings.PSRTYPE_MAPPINGS[code] +' Generation'
columns.append(column)
plt.figure(figsize=(10,10))
# FOR SAVING EXCEL FILE
if not os.path.isdir('plots'):
os.mkdir('plots')
writer = pd.ExcelWriter(('./plots/' + title + ' ' + freq + ' ' + range_str + ' spread' + '.xlsx'), mode='w')
df_to_excel = pd.DataFrame()
# PLOTTING SPREAD
if range is not None:
#derive min, max, mean for the specific psr group
psr_totals_df = get_combined_df(region, freq, range[0], range[1], columns, type_name)
# access min, max, mean data
df_max = max(psr_totals_df)
df_min = min(psr_totals_df)
df_mean = mean(psr_totals_df)
#plot
#plot mean line
plt.plot(df_mean.index+1, df_mean[type_name]/1000, label=range_str + ' Mean', color='green')
#plot min_max spread
plt.fill_between(df_max.index+1, df_max[type_name]/1000, df_min[type_name]/1000, color='grey', alpha=0.3, label=range_str + ' Min-Max Spread')
df_to_excel['Mean [GWh]'] = df_mean[type_name]/1000
df_to_excel['Max [GWh]'] = df_max[type_name]/1000
df_to_excel['Min [GWh]'] = df_min[type_name]/1000
# END PLOTTING SPREAD
# PLOTTING YEARS
if years is not None:
for year in years:
# access respective frequency file, year
df = pd.read_excel(basename + '.xlsx', sheet_name=str(year))
# add together requested psr types into one df
existing_columns = [col for col in columns if col in df.columns]
df[type_name] = df[existing_columns].sum(axis=1)
# plot that year and psr group
# print(df.index)
plt.plot(df.index+1, df[type_name]/1000, label=str(year))
df_to_excel[str(year) + ' [GWh]'] = df[type_name]/1000
# END PLOTTING YEARS
time_int = mappings.FREQUENCY_MAPPINGS[freq]
plt.xlabel(time_int)
plt.ylabel('GWh/'+time_int)
plt.title(title)
plt.legend() # put the legend outside the main frame
#save the plot
plt.savefig('./plots/'+ title + ' ' + freq + ' ' + range_str + ' spread')
# Show the plot
if output_table_file:
df_to_excel.to_excel(writer)
writer.close()
plt.show()
return
#end my_plot
############
### MAIN ###
############
if __name__ == '__main__':
### REGION ###
region = 'Romania' # name of the combined region as saved in excel files
freq = '1M' # frequency as saved in excel files
### YEARS FOR MIN-MAX SPREAD, MEAN LINE ###
start_spread = 2015
end_spread = 2023
### GENERATION TYPE/GROUP ###
psr_codes = psr_groups.TOTAL # group for type of generation (see psr_groups.py and mappings.py)
psr_label = 'Total' # custom label
### YEAR-SPECIFIC LINES ###
start_year = 2020
end_year =2024
### call to plot ###
my_plot(region, freq, psr_codes, psr_label, range=(start_spread, end_spread), years=range(start_year, end_year+1), output_table_file=True)