-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharima1.py
151 lines (115 loc) · 4.85 KB
/
arima1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import warnings
import itertools
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'red'
df = pd.read_excel("Superstore.xls")
office = df.loc[df['Category'] == 'Office Supplies']
print("start date:-{} , end date:-{} ".format(office['Order Date'].min(),office['Order Date'].max()))
cols = ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Quantity', 'Discount', 'Profit']
office.drop(cols, axis=1, inplace=True)
office = office.sort_values('Order Date')
print("\n\nChecking for missing values:-")
print(office.isnull().sum())
office = office.groupby('Order Date')['Sales'].sum().reset_index()
print("\n\n office dataframe head")
print(office.head())
print("\n\n office dataframe tail")
print(office.tail())
office = office.set_index('Order Date')
print("\n\n office index(new)")
print(office.index)
#Actual Data
print("\n\n Actual data(Irregular Time Series)")
office.plot(figsize=(15, 6))
plt.show()
#Month Start (Converting to regular time series)
y = office['Sales'].resample('MS').mean()
print("\n\nMonth Start of 2017(Regular Time Series)")
print(y['2017':])
#Visualizing Techology Sales Time Series Data
print("\n\n Visualizing Office Supplies Sales")
y.plot(figsize=(15, 6))
plt.show()
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
#decompsition of time series data to its components
print("\n\n Decomposition to its Components")
decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()
# Combination of parameters
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for ARIMA Model...')
print('(p,d,q),(P,D,Q): {} , {}'.format(pdq[1], seasonal_pdq[1]))
print('(p,d,q),(P,D,Q): {} , {}'.format(pdq[1], seasonal_pdq[2]))
print('(p,d,q),(P,D,Q): {} , {}'.format(pdq[5], seasonal_pdq[3]))
print('(p,d,q),(P,D,Q): {} , {}'.format(pdq[6], seasonal_pdq[4]))
#parameter combination selection
print("\n\nSelection of parameter combination based on Information Criteria")
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{},{} - BIC:{}'.format(param, param_seasonal, results.bic))
except:
continue
print("\nBest model so far ARIMA(1,1,1)(1,1,0,12)")
# Fitting the model
mod = sm.tsa.statespace.SARIMAX(y,
order=(1, 1, 1),
seasonal_order=(1, 1, 0, 12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print("\n\nSummary of the Selected Model")
print(results.summary())
#Residual of the model
results.plot_diagnostics(figsize=(16, 8))
plt.show()
# Validating forecasts
print("\n\n Validating forecast")
pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y['2014':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Office Supplies')
plt.legend()
plt.show()
#The Root Mean Squared Error of our forecasts
y_forecasted = pred.predicted_mean
y_truth = y['2017-01-01':]
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Root Mean Squared Error of our forecasts is {}'.format(round(np.sqrt(mse), 2)))
#Producing and visualizing forecasts
print("\nProducing and visualizing forecasts")
pred_uc = results.get_forecast(steps=25)
pred_ci = pred_uc.conf_int()
ax = y.plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Office Supplies Sales')
plt.legend()
plt.show()