-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpython-pptx_aid.py
255 lines (205 loc) · 9.34 KB
/
python-pptx_aid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
'''
A list of functions for automating reports with python-pptx
'''
# Importing libraries
import pandas as pd
import numpy as np
from pptx.chart.data import CategoryChartData, ChartData, BubbleChartData
from pptx.util import Inches
from pptx.enum.chart import XL_CHART_TYPE
from pptx.dml.color import RGBColor
from pptx.enum.dml import MSO_THEME_COLOR
from pptx.util import Pt
# Functions for displaying graphs:
def plot_bubble(chart_position:str,
categories_names:list,
x_values:list,
y_values:list,
size_values:list,
slides_data:pd.DataFrame,
latest_date:str):
'''
A function for replacing bubble graphs data
Args:
chart_position (str): position of graph object on the slide (example - slide.shapes[0].chart)
categories_names (list): a list with category names (how are the categories called - what to show on the graph)
x_values (list): a list with values locations (in the slides_date df) for X axis
y_values (list): a list with values locations (in the slides_date df) for Y axis
size_values (list): a list with values locations (in the slides_date df) for bubble size
slides_data(DataFrame): dataframe with data for the graph
latest_date(str): latest report date (from which column to take data)
'''
# Selecting the chart
chart = chart_position
# Creating the chart data object
chart_data = BubbleChartData()
x_data = [round(slides_data.loc[x, latest_date]) for x in x_values]
y_data = [round(slides_data.loc[y, latest_date]) for y in y_values]
size_data = [round(slides_data.loc[z, latest_date]) for z in size_values]
# Replacing data
curr_series = chart_data.add_series('Slide data')
for i, category in enumerate(categories_names):
curr_series.add_data_point(x_data[i], y_data[i], size_data[i])
chart.replace_data(chart_data)
# Editing the data labels and font
for i, category in enumerate(categories_names):
chart.series[0].points[i].data_label.text_frame.text = category
edit_text(text=chart.series[0].points[i].data_label.text_frame.paragraphs[0],
font_name='Calibri',
font_size=11,
font_italic=False,
font_bold=False,
font_shadow=False,
font_color_rgb=RGBColor(64,64,64))
chart.replace_data(chart_data)
def plot_dynamic(chart_position:str,
date_columns:list,
num_lines:int,
lines_locations:list,
slides_data:pd.DataFrame,
lines_names:list,
abs_values=True,
rounding_level=0):
'''
A function for replacing most graphs data (no bubble|lines)
Args:
chart_position(str): position of graph object on the slide (example - slide.shapes[0].chart)
date_columns(list): a list of date columns (from DF) from which to get data
num_lines(int): number of lines in a graph
lines_locations(list): a list with values locations (in the slides_date df), which rows to take
slides_data(DataFrame): dataframe with data for the graph
lines_names(list): a list with series names
abs_values(bool): displaying absolute values as labels or not
rounding_level(int): number of decimal places to display
'''
# Selecting the chart:
chart = chart_position
# Creating the chart data object:
chart_data = ChartData()
date_columns_full = []
# Check which dates (columns) have data in them and only use those:
for date in date_columns:
if not pd.isna(slides_data.loc[lines_locations[0], date]):
date_columns_full.append(date)
chart_data.categories = date_columns_full
# Creating a separate list for each line:
lines_to_fill = [[] for i in range(num_lines)]
# Adding tha data for each line into the corresponding list
for date in date_columns_full:
for i, line in enumerate(lines_to_fill):
data_point = lines_locations[i]
line.append(round(slides_data.loc[data_point, date], rounding_level))
for i, name in enumerate(lines_names):
chart_data.add_series(name, lines_to_fill[i])
# Replacing data:
chart.replace_data(chart_data)
if abs_values:
# Formatting labels if required:
for chart_series in chart.series:
chart_series.data_labels.number_format = '#;#'
def plot_static(chart_position:int,
categories_names:list,
series_names:list,
series_data:list,
slides_data:pd.DataFrame,
latest_date:str,
abs_values=True):
'''
A function for replacing regular chart data
Args:
chart_position (str): position of graph object on the slide (example - slide.shapes[0].chart)
categories_names (list): a list with categories names
series_names (list): a list with series names
series_data(list of lists): a list of lists with df row numbers containing data for each category. One list - one category, with 1 entry for each series.
slides_data(DataFrame): dataframe with data for the graph
latest_date(str): last date (which column to use)
abs_values(bool): displaying absolute values as labels or not
'''
# Selecting the cahrt:
chart = chart_position
# Gathering data:
new_series_data = []
for list in series_data:
new_list = []
for item in list:
data_point = round(slides_data.loc[item, latest_date])
new_list.append(data_point)
new_series_data.append(new_list)
# Creating the chart data object:
chart_data = ChartData()
chart_data.categories = categories_names
# Collecting data for inserting:
for i, u_series in enumerate(series_names):
chart_data.add_series(u_series, new_series_data[i])
# Replacing data:
chart.replace_data(chart_data)
if abs_values:
# Formatting labels if required:
for chart_series in chart.series:
chart_series.data_labels.number_format = '#;#'
# Functions for editing text data:
def edit_text(text:object,
font_name:str,
font_size:int,
font_color_rgb:list,
font_italic=False,
font_bold=False,
font_shadow=False):
'''
Function for easier text editing
Args:
text (object): position of graph object on the slide (example - slide.shapes[0].text_frame.paragraphs[0])
font_color_rgb(list): tuple or a list containing color RGB value (example - font_color_rgb=RGBColor(127,127,127))
'''
text.font.name = font_name
text.font.size = Pt(font_size)
text.font.italic = font_italic
text.font.bold = font_bold
text.font.color.rgb = font_color_rgb
text.font.shadow = font_shadow
def calc_diff(position:int,
negative=False
) -> float:
'''
Function to calculate difference between the latest date and the one before that
Args:
position (int): which dataframe row corresponds to data of interest (where to calculate)
negative (bool): whether the value is negative (if it is - simple substraction would not work)
Returns:
current_difference (float): difference between two values - from the last date and the one before that
'''
global slides_data # Using global variable slides_data - the dataframe with all data
global latest_date
global pre_latest_date
if negative == False:
current_difference = round(slides_data.loc[position, latest_date]) - round(slides_data.loc[position, pre_latest_date])
elif negative == True:
current_difference = -1 * (round(slides_data.loc[position, latest_date]) - round(slides_data.loc[position, pre_latest_date]))
return current_difference
def print_out_diff(current_diff:float,
threshold:float,
significant=False)-> str:
"""
A function for creating a text containing the difference. Useful for easily creating text boxes indicating changes since last measurement.
Args:
current_diff (float): difference between two values - from the last date and the one before that
threshold (float): what difference level do we consider significant.
significant (bool): if the metric itself is considered significant. If it is - then any number is displayed, if not - only if the number is bigger then threshold
Returns:
curr_text (str): text itself with a sign (+ or -), value and 'p.p.' standing for 'percentage points'
"""
if significant == False:
if current_diff >= threshold:
curr_text = f'+{current_diff} p.p.'
elif current_diff <= -threshold:
curr_text = f'{current_diff} p.p.'
else:
curr_text = ''
if significant == True:
if current_diff > 0:
curr_text = f'+{current_diff} p.p.'
elif current_diff == 0:
curr_text = '0 p.p.'
elif current_diff < 0:
curr_text = f'{current_diff} p.p.'
return curr_text