-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathtab_stats_generate_md_file.py
executable file
·290 lines (243 loc) · 13.7 KB
/
tab_stats_generate_md_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/python3
"""
Script using the gathered data from the OSHP project "oshp-stats" to generate/update the
markdown file "tab_statistics.md" with mermaid pie charts with differents statistics about HTTP security headers usage.
Source:
https://mermaid-js.github.io/mermaid/#/pie
https://github.com/oshp/oshp-stats/
"""
import sqlite3
import re
import json
import hashlib
from collections import Counter
from datetime import datetime
from pathlib import Path
# Constants
DEBUG = True
DATA_DB_FILE = "/tmp/data.db"
OSHP_SECURITY_HEADERS_FILE_lOCATION = "headers_add.json"
OSHP_SECURITY_HEADERS_EXTRA_FILE_LOCATION = "/tmp/oshp_headers_extra_to_include.txt"
MD_FILE = "../tab_statistics.md"
IMAGE_FOLDER_LOCATION = "../assets/tab_stats_generated_images"
TAB_MD_TEMPLATE = """---
title: statistics
displaytext: Statistics
layout: null
tab: true
order: 7
tags: headers
---
<!-- All the content of this file is generated by the script "ci/tab_stats_generate_md_file.py" -->
<!-- DO NOT EDIT IT MANUALLY -->
# Statistic about HTTP security response headers usage
<!-- markdown-link-check-disable -->
"""
SECTION_TEMPLATE = f"""
## %s
%s
![%s]({IMAGE_FOLDER_LOCATION.replace('../', '')}/%s)
"""
SECTION_TEMPLATE_NO_MERMAID_CODE = """
## %s
%s
"""
# Utility functions
def trace(msg):
if DEBUG:
print(f"[DEBUG] {msg}")
def prepare_generation_of_image_from_mermaid(mermaid_code, filename):
trace(f"Call prepare_generation_of_image_from_mermaid() => {filename}")
with open(f"{IMAGE_FOLDER_LOCATION}/{filename}.mmd", "w", encoding="utf-8") as f:
f.write(mermaid_code + "\n")
trace("Call end.")
def load_oshp_headers():
trace("Call load_oshp_headers()")
header_names = []
trace(f"Call load_oshp_headers() :: Load and parse file {OSHP_SECURITY_HEADERS_FILE_lOCATION}")
with open(OSHP_SECURITY_HEADERS_FILE_lOCATION, mode="r", encoding="utf-8") as f:
data = json.load(f)
http_headers = data["headers"]
for http_header in http_headers:
header_names.append(http_header["name"].lower())
trace(f"Call load_oshp_headers() :: Load file {OSHP_SECURITY_HEADERS_EXTRA_FILE_LOCATION}")
with open(OSHP_SECURITY_HEADERS_EXTRA_FILE_LOCATION, mode="r", encoding="utf-8") as f:
http_headers = f.read()
trace(f"Call load_oshp_headers() :: Parse file {OSHP_SECURITY_HEADERS_EXTRA_FILE_LOCATION}")
for http_header in http_headers .split("\n"):
header_names.append(http_header.lower().strip(" \n\r\t"))
header_names = list(dict.fromkeys(header_names))
header_names.sort()
trace("Call end.")
return header_names
def execute_query_against_data_db(sql_query):
trace(f"Call execute_query_against_data_db() => {sql_query}")
with sqlite3.connect(DATA_DB_FILE) as connection:
curs = connection.cursor()
curs.execute(sql_query)
records = curs.fetchall()
trace("Call end.")
return records
def add_stats_section(title, description, chart_mermaid_code):
trace(f"Call add_stats_section() => '{title}'")
with open(MD_FILE, mode="a", encoding="utf-8") as f:
if chart_mermaid_code is not None and len(chart_mermaid_code.strip()) > 0:
base_image_filename = hashlib.sha1(title.encode("utf8")).hexdigest()
prepare_generation_of_image_from_mermaid(chart_mermaid_code, base_image_filename)
md_code = SECTION_TEMPLATE % (title, description, base_image_filename, f"{base_image_filename}.png")
else:
md_code = SECTION_TEMPLATE_NO_MERMAID_CODE % (title, description)
f.write(f"{md_code}\n")
trace("Call end.")
def init_stats_file():
trace("Call init_stats_file()")
with open(MD_FILE, mode="w", encoding="utf-8") as f:
cdate = datetime.now().strftime("%m/%d/%Y at %H:%M:%S")
f.write(TAB_MD_TEMPLATE)
f.write("\n\n")
f.write(f"📅 Last update: {cdate} - Domains analyzed count: {get_domains_count()}.\n")
trace("Call end.")
def get_domains_count():
return len(execute_query_against_data_db("select distinct domain from stats"))
def get_pie_chart_code(title, dataset_tuples):
# code = f"pie title {title}\n"
code = f"pie\n"
for dataset_tuple in dataset_tuples:
# Note: Mermaid use integer value when rendering
code += f"\t\"{dataset_tuple[0]}\" : {round(dataset_tuple[1], 2)}\n"
return code
def csp_contain_unsafe_expression(csp_policy):
contain_unsafe_expression = False
# Determine if a CSP policy contains (default-src|script-src|script-src-elem|script-src-attr|style-src) directives using (unsafe-inline|unsafe-hashes|unsafe-eval) expressions
# Based on "https://report-uri.com/home/generate" generator allowed instructions for CSP directives
exp_all_unsafe_expressions = r'(unsafe-inline|unsafe-hashes|unsafe-eval)'
exp_style_unsafe_expressions = r'(unsafe-inline|unsafe-hashes)'
exp_directive_name_allowing_all_unsafe_expressions = r'(default-src|script-src|script-src-elem|script-src-attr)'
directives = csp_policy.split(";")
for directive in directives:
if len(re.findall(exp_directive_name_allowing_all_unsafe_expressions, directive)) > 0 and len(re.findall(exp_all_unsafe_expressions, directive)) > 0:
contain_unsafe_expression = True
break
elif directive.strip().startswith("style-src") and len(re.findall(exp_style_unsafe_expressions, directive)) > 0:
contain_unsafe_expression = True
break
return contain_unsafe_expression
# Functions in charge of generate stats sections
def compute_header_global_usage(header_name):
title = f"Global usage of header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed."
# Prevent the case in which a domain specify X times the same headers...
query = f"select distinct domain from stats where lower(http_header_name) = '{header_name}'"
count_of_domains_using_the_header = len(
execute_query_against_data_db(query))
domains_count = get_domains_count()
percentage_of_domains_using_the_header = (
count_of_domains_using_the_header * 100) / domains_count
dataset_tuples = [("Using it", percentage_of_domains_using_the_header),
("Not using it", (100-percentage_of_domains_using_the_header))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
def compute_insecure_framing_configuration_global_usage():
header_name = "x-frame-options"
title = f"Global usage of insecure framing configuration via the header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure framing configuration: value different from `DENY` or `SAMEORIGIN` including unsupported values."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not in ('deny','sameorigin')"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Insecure conf", percentage_of_domains),
("Secure conf", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
def compute_hsts_preload_global_usage():
header_name = "strict-transport-security"
title = "Global usage of the Strict Transport Security 'preload' feature"
description = f"Provide the distribution of usage of the '[preload](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security#preloading_strict_transport_security)' feature for the header '{header_name}' across all domains analyzed."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) not like '%preload%'"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Using it", percentage_of_domains),
("Not using it", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
def compute_secure_headers_global_usage():
title = "Global usage of secure headers"
description = f"Provide the distribution of usage of secure headers across all domains analyzed."
query = "select count(domain) from stats where http_header_name is NULL"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Not using them", percentage_of_domains),
("Using them", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
def compute_insecure_referrer_configuration_global_usage():
header_name = "referrer-policy"
title = f"Global usage of insecure referrer configuration via the header '{header_name}'"
description = f"Provide the distribution of usage of the header '{header_name}' across all domains analyzed with a insecure referrer configuration: value set to `unsafe-url` or `no-referrer-when-downgrade`.\n\n`no-referrer-when-downgrade` was included because it send origin, path, and querystring when the protocol security level stays the same (HTTPS is very often in place)."
query = f"select count(*) from stats where lower(http_header_name) = '{header_name}' and lower(http_header_value) in ('unsafe-url','no-referrer-when-downgrade')"
count_of_domains = execute_query_against_data_db(query)[0][0]
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Insecure conf", percentage_of_domains),
("Secure conf", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
def compute_hsts_average_maxage_global_usage():
title = "Global common 'max-age' values of the Strict Transport Security header"
query = "select lower(http_header_value) from stats where lower(http_header_name) = 'strict-transport-security' and lower(http_header_value) like '%max-age=%'"
header_values = execute_query_against_data_db(query)
expr = r'max-age\s*=\s*(\-?"?\d+"?)'
# Gather values for max-age attribute
values = []
for header_value in header_values:
v = header_value[0].strip('\n\r\t').replace('"', '')
matches = re.findall(expr, v)
if len(matches) > 0:
values.append(int(matches[0]))
# Find the most popular one
occurences = Counter(values)
maxage_most_popular_value = 0
current_max_occurence_count = 0
for maxage_value, occurence_count in occurences.items():
if occurence_count > current_max_occurence_count:
current_max_occurence_count = occurence_count
maxage_most_popular_value = maxage_value
description = f"* Most common value used is {maxage_most_popular_value} seconds ({round(maxage_most_popular_value/60)} minutes) across all domains analyzed."
description += f"\n* Maximum value used is {max(values)} seconds ({round(max(values)/60)} minutes) across all domains analyzed."
description += f"\n* Minimum value used is {min(values)} seconds ({round(min(values)/60)} minutes) across all domains analyzed."
add_stats_section(title, description, None)
def compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage():
header_name = "content-security-policy"
title = f"Global usage of content security policy with directives allowing unsafe expressions"
description = f"Provide the distribution of content security policy allowing unsafe expressions across all domains analyzed.\n\nDetermine if a CSP policy contains `(default-src|script-src|script-src-elem|script-src-attr|style-src)` directives using `(unsafe-inline|unsafe-hashes|unsafe-eval)` expressions.\n\nBased on [Report-URI CSP](https://report-uri.com/home/generate) generator allowed instructions for CSP directives."
query = f"select lower(http_header_value) from stats where lower(http_header_name) like '{header_name}%' and lower(http_header_value) like '%unsafe%'"
header_values = execute_query_against_data_db(query)
count_of_domains = 0
for header_value in header_values:
if csp_contain_unsafe_expression(header_value[0]):
count_of_domains += 1
domains_count = get_domains_count()
percentage_of_domains = (count_of_domains * 100) / domains_count
dataset_tuples = [("Using unsafe", percentage_of_domains),
("Not using unsafe", (100-percentage_of_domains))]
pie_chart_code = get_pie_chart_code(title, dataset_tuples)
add_stats_section(title, description, pie_chart_code)
if __name__ == "__main__":
trace("Clear PNG files")
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.png"):
path.unlink()
trace("Clear MMD files")
for path in Path(IMAGE_FOLDER_LOCATION).glob("*.mmd"):
path.unlink()
oshp_headers = load_oshp_headers()
init_stats_file()
compute_secure_headers_global_usage()
for header_name in oshp_headers:
compute_header_global_usage(header_name)
compute_insecure_framing_configuration_global_usage()
compute_insecure_referrer_configuration_global_usage()
compute_hsts_preload_global_usage()
compute_hsts_average_maxage_global_usage()
compute_csp_using_directives_with_unsafe_expressions_configuration_global_usage()