-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlppinv.ado
executable file
·201 lines (190 loc) · 9.21 KB
/
lppinv.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
*! version 1.1.6 20feb2024 I I Bolotov
program define lppinv, rclass byable(recall)
version 16.0
/*
The program implements a non-iterated general LPLS estimator for linear
programming with the help of the Moore-Penrose inverse (pseudoinverse),
calculated using singular value decomposition (SVD), with emphasis
on estimation of OLS constrained in values (cOLS), Transaction Matrix
(TM), and custom (user-defined) cases. Eventual regression analysis and
a Monte-Carlo-based t-test of mean NRMSE are performed, the sample being
drawn from a uniform or a user-specified distribution (a Mata function).
Author: Ilya Bolotov, MBA, Ph.D.
Date: 31 January 2022
*/
tempname t
// syntax
syntax ///
anything [if] [in] [, ///
COLS TM ///
Model(string) Constraints(string) Slackvars(string) ///
ZERODiagonal noMC ///
TOLerance(real 0) Level(cilevel) ///
SEED(int `=c(rngseed_mt64s)') ITERate(int `=c(maxiter)') ///
DISTribution(string) noTRACE ///
]
// adjust and preprocess options
if "`cols'" != "" & "`tm'" != "" { // either cOLS or TM option
di as err "options cols and tm are mutually incompatible"
exit 198
}
if `"`distribution'"' == "" { // function name -> pointer
loc distribution "lppinv_runiform"
}
// select a subset of the data (if applicable)
if `"`if'`in'"' != "" {
preserve
qui keep `if' `in'
}
// perform the pinv estimation via SVD
sca `t' = clock(c(current_date) + " " + c(current_time), "DMYhms")
mata: lppinv( ///
"`cols'`tm'", `"`anything'"', `"`model'"', `"`constraints'"', ///
`"`slackvars'"', ("`zerodiagonal'" != "" ? 1 : 0), ///
("`mc'" != "" ? 0 : 1), ("`trace'" != "" ? 0 : 1), ///
(`tolerance' ? `tolerance' : .), `level', `seed', `iterate', ///
&`distribution'() ///
)
sca `t' = clock(c(current_date) + " " + c(current_time), "DMYhms") - `t'
// print output
di as txt _n "Converged in `=round(`t'/1000, .001)'s with NRMSE = " ///
_c "`=round(r(nrmse), .0001)', solution stored in" ///
_c "{stata ret li: r(solution)}"
// return output
ret add
end
version 16.0
loc RS real scalar
loc RV real colvector
loc RM real matrix
loc SS string scalar
loc TM transmorphic matrix
loc PF pointer(`RM' function) scalar
loc VV void
loc version "version 16:"
mata:
mata set matastrict on
`VV' lppinv(`SS' lp, `SS' rhs, `SS' lhs_m, `SS' lhs_c, `SS' lhs_s, |`RS' f_d,
`RS' f_cv, `RS' f_t, `RS' tol, `RS' lvl, `RS' seed, `RS' iter,
`PF' dist) /* solver */
{
`RS' r, c, i
`RV' x, e
`RM' b, M, C, S, a
`TM' tmp, f
// general configuration
tmp = _stata("`version' tsset, noq", 1)
b = (! _stata("`version' confirm numeric var " + /* RHS */
regexr(rhs, ".+\.", ""), 1)
? st_data(., (! tmp ? _st_tsrevar(tokens(rhs)) : tokens(rhs)))
: (length(tokens(rhs)) == 1 ? st_matrix(rhs) : J(0,0,.))
)
M = (! _stata("`version' confirm numeric var " + /* LHS */
regexr(lhs_m, ".+\.", ""), 1)
? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_m)) : tokens(lhs_m)))
: (length(tokens(lhs_m)) == 1 ? st_matrix(lhs_m) : J(0,0,.))
)
C = (! _stata("`version' confirm numeric var " +
regexr(lhs_c, ".+\.", ""), 1)
? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_c)) : tokens(lhs_c)))
: (length(tokens(lhs_c)) == 1 ? st_matrix(lhs_c) : J(0,0,.))
)
S = (! _stata("`version' confirm numeric var " +
regexr(lhs_s, ".+\.", ""), 1)
? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_s)) : tokens(lhs_s)))
: (length(tokens(lhs_s)) == 1 ? st_matrix(lhs_s) : J(0,0,.))
)
f_d = f_d != . ? f_d : 0 /* flag: zero-diagonal */
f_cv = f_cv != . ? f_cv : 1 /* flag: critical values */
f_t = f_t != . ? f_t : 1 /* flag: trace */
tol = tol != . ? tol : . /* tolerance */
lvl = lvl != . ? lvl : c("level") /* confidence level */
seed = seed != . ? seed : c("rngseed_mt64s") /* MC seed */
iter = iter != . ? iter : 500 /* MC iterations */
dist = dist ? dist : &lppinv_runiform() /* MC distribution */
// prepare the LHS (left-hand side), `a`, and RHS (right-hand side), `b`
if (strlower(lp) == "cols") { /* LS-LP type: cOLS */
if (rows(C) + rows(M) > rows(b)) b = b\b
}
if (strlower(lp) == "tm") { /* LS-LP type: TM */
if (! rows(b) | (rows(b) & cols(b) != 2) |
(rows(S) & cols(S) != 2)) {
errprintf("Error: TM requires two columns in `b`\n" )
exit(503)
}
/* C -> characteristic matrix */
i = rows(b) - rows(M)
r = rows(select(tmp=b[1..i,1], tmp :< .)) /* rows and cols */
c = rows(select(tmp=b[1..i,2], tmp :< .))
C = I(r)#J(1,c,1)\J(1,r,1)#I(c) /* row- (first), colsums */
/* S -> characteristic matrix */
if (S != J(0,0,.))
S = select(S[,1]\S[,2], S[,1]\S[,2] :< .)
}
/* M, C, S -> `a` */
if (f_d & strlower(lp) == "tm")
M = (rows(M) ? M : J(0,r*c,.))\ /* diagonal of `C` -> M */
((tmp=(I(min((r,c)))#(1,J(1,c,0)))[,(1..min((r,c))*c)]),
J(rows(tmp),r*c-cols(tmp),0))
a = (rows(C) ? C,(rows(S) ? S : J(rows(C),0,.)) : J(0,cols(M) + cols(S),.))\
(rows(M) ? M,J(rows(M),cols(S),0) : J(0,cols(C) + cols(S),.))
/*`b` -> (, 1) */
if (strlower(lp) == "tm")
b = b[1..r,1]\b[1..c,2]\
(rows(b)>max((r,c)) ? rowsum(b[max((r,c))+1..rows(b),]) : J(0,1,.))
if (f_d & strlower(lp) == "tm")
b = b\(f_d ? J(rows(a)-rows(b),1,0) : J(0,1,.))
/* check the dimensions of `a` and `b` */
if (rows(a) != rows(b)) {
errprintf("Error: `a` and `b` are not conformable\n")
exit(503)
}
/* drop missing values in `a` and `b` */
a = select(a, (tmp=rowmissing(a)+rowmissing(b)) :== 0)
b = select(b, tmp :== 0)
st_matrix("r(a)", a, "hidden")
st_matrix("r(b)", b, "hidden")
C = M = rows(select(C, rowmissing(C) :== 0)) /* clear memory */
S = cols(S)
// obtain the SVD-based solution of the matrix equation `a @ x = b`
x = svsolve(a, b, ., tol) /* solution, NRMSE, R2_C */
e = e\sqrt(cross((tmp=b - a * x), tmp) / (r=rows(b)) / variance(b))\
(C ? 1 - cross((tmp=b[1..C] - (a * x)[1..C]), tmp[1..C]) /
cross((tmp=b[1..C] :- mean(b[1..C])), tmp[1..C]) : .)
/* regression results (if applicable) */
st_framecreate(f=st_tempname())
if (cols(a) <= rows(b)) {
(void) _stata("`version' frame "+f+": svmat r(b), n(b)", 1) &
_stata("`version' frame "+f+": svmat r(a), n(a)", 1) &
_stata("`version' frame "+f+": reg b1 a*, noc " +
"l("+strofreal(lvl)+")", ! f_t)
}
/* NRMSE t-test for `a', based on MC with iter simulations */
if (e[1] != . & f_cv) { /* skip if NRMSE == . */
e = e\((tmp=trunc(floatround(log10(iter)))) + trunc(floatround(tmp / 3))
+ 2) /* format: %e[3].0fc */
printf("\n{txt}Simulations ({res}%"+strofreal(e[3] - 1)+".0fc{txt})\n",
iter)
printf("----+--- 1 ---+--- 2 ---+--- 3 ---+--- 4 ---+--- 5\n")
rseed(seed)
for(i = 1; i < (iter + 1); i++) {
e = e\sqrt(cross((tmp=(b=(*dist)(r,1)) - a * svsolve(a, b, ., tol)),
tmp) / r / variance(b))
if (! mod(i, 5)) { printf("....."); displayflush(); }
if (! mod(i, 50)) printf("%"+strofreal(e[3])+".0fc\n", i)
}
st_matrix("r(e)", e[4::rows(e)], "hidden")
(void) _stata("`version' frame "+f+": svmat r(e), n(e)", 1) &
_stata("`version' frame "+f+": ttest e1 == " +
strofreal(e[1]) + ", l("+strofreal(lvl)+")", ! f_t)
}
// return the solution `x', matrix `a', and NRMSE
st_matrix("r(solution)", strlower(lp) == "tm" ? colshape(x[1..rows(x)-S], c)
: x[1..rows(x)-S])
st_matrix("r(a)", a, "hidden")
st_numscalar("r(r2_c)", strlower(lp) == "tm" & e[2] >= 0 & e[2] <= 1 ? e[2]
: .)
st_numscalar("r(nrmse)", e[1])
}
`RM' lppinv_runiform(`RS' r, `RS' c) return(runiform(r, c)) /* dummy */
end