-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenford_library.py
197 lines (158 loc) · 5.38 KB
/
benford_library.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
"""
This python file present:
Leading Numbers Count functions
This script is for functions that return and manipulate the distribution of leading digits of a numerical distribution.
Make sure you numerical distribution spans a few orders of magnitude.
Author: ayoubft
"""
# Import needed libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
def leading_digit(x):
"""Get the first digit in a positive number
Args:
x (float): POSITIVE float
Returns:
int: one digit from 1 to 9
"""
# loop until you get the leading digit
while x >= 10:
x //= 10
return x
def count_leading_digits(numd):
"""Get the distribution of leading digits of a given numerical distribution
Args:
numd (array of floats): [numpy]array of the numbers
Returns:
array of ints: repartition of first digit numbers in numd
"""
# Initialize the repartition by ones to avoid division by zero error further ahead
f=[1, 1, 1, 1, 1, 1, 1, 1, 1]
for i in numd:
c = leading_digit(i)
if c == 1:
f[0] += 1
elif c == 2:
f[1] += 1
elif c == 3:
f[2] += 1
elif c == 4:
f[3] += 1
elif c == 5:
f[4] += 1
elif c == 6:
f[5] += 1
elif c == 7:
f[6] += 1
elif c == 8:
f[7] += 1
elif c == 9:
f[8] += 1
return f
def count_leading_digits_var2(numd):
"""Get the distribution of leading digits of a given numerical distribution
Args:
numd (array of floats): [numpy]array to hold the distribution of numbers
Returns:
array of ints: repartition of first digit numbers in numd
"""
# Initialize the repartition by ones to avoid division by zero error further ahead
f=[1, 1, 1, 1, 1, 1, 1, 1, 1]
for i in numd:
c = str(int(i))[0]
if c == '1':
f[0] += 1
elif c == '2':
f[1] += 1;
elif c == '3':
f[2] += 1;
elif c == '4':
f[3] += 1;
elif c == '5':
f[4] += 1;
elif c == '6':
f[5] += 1;
elif c == '7':
f[6] += 1;
elif c == '8':
f[7] += 1;
elif c == '9':
f[8] += 1;
return f
def frequency_leading_digits(f):
"""Get a list of distributions frequencies
Args:
f (array of ints): The repartition of first digits of a given distribution
Returns:
array of floats: The distribution of leading digits summed to 100%
"""
k = []
n = sum(f)
for i in range(len(f)):
k.append(100*f[i]/n)
return k
def print_distribution_leading_digits(f):
"""Format the output
Args:
f (array of floats): The frequencies of distributiion of leading digits
"""
n = sum(f)
print("digit | frequency")
print("------|----------")
for i in range(len(f)):
print(f" {i+1} | {100 * (f[i] / n):6.2f}")
# "%d: %6.1f%%\n
# X (ints): 1 to 9 integers
X = np.arange(1, 10)
# B (array of floats): distribution of Benford
B = 100 * np.log10(1 + (1 / X))
def plot_benford(ff1, ff2, ff3):
"""This function is to automate plotting Benford distributions
Args:
ff1 (array of floats): distribution of the DEM
ff2 (array of floats): distribution of the SLOPE
ff3 (array of floats): distribution of the ASPECT
"""
# Plotting both the curves simultaneously
plt.figure(figsize=(15, 12))
plt.plot(X, B, '--', color='r', label='Benford', linewidth=8)
plt.plot(X, ff1, color='g', label='DEM', linewidth=5)
plt.plot(X, ff2, color='b', label='SLOPE', linewidth=5)
plt.plot(X, ff3, color='y', label='ASPECT', linewidth=5)
# Naming the x-axis, y-axis and the whole graph
plt.xlabel("Leading Digit", fontsize=20)
plt.xticks(fontsize = 22)
plt.ylabel("Percentage", fontsize=20)
plt.yticks(fontsize = 22)
plt.title("Leading Digits Distributions", fontsize=28)
# Adding legend, which helps us recognize the curve according to it's color
plt.legend(fontsize=18)
# To load the display window
plt.show()
def plot_benford4(ff1, ff2, ff3, ff4):
"""This function is to automate plotting Benford distributions
Args:
ff1 (array of floats): distribution of the DEM
ff2 (array of floats): distribution of the SLOPE
ff3 (array of floats): distribution of the ASPECT
ff4 (array of floats): distribution of the STRAHLER ORDER
"""
# Plotting both the curves simultaneously
plt.figure(figsize=(15, 12))
plt.plot(X, B, '--', color='r', label='Benford', linewidth=8)
plt.plot(X, ff1, color='g', label='DEM', linewidth=5)
plt.plot(X, ff2, color='b', label='SLOPE', linewidth=5)
plt.plot(X, ff3, color='y', label='ASPECT', linewidth=5)
plt.plot(X, ff4, color='k', label='STRAHLER', linewidth=5)
# Naming the x-axis, y-axis and the whole graph
plt.xlabel("Leading Digit", fontsize=20)
plt.xticks(fontsize = 22)
plt.ylabel("Percentage", fontsize=20)
plt.yticks(fontsize = 22)
plt.title("Leading Digits Distributions", fontsize=28)
# Adding legend, which helps us recognize the curve according to it's color
plt.legend(fontsize=18)
# To show the plot
plt.show()