lppinv.ado

*! version 1.1.6  20feb2024  I I Bolotov
program define lppinv, rclass byable(recall)
	version 16.0
	/*
		The program implements a non-iterated general LPLS estimator for linear 
		programming with the help of the Moore-Penrose inverse (pseudoinverse), 
		calculated using singular value decomposition (SVD), with emphasis      
		on estimation of OLS constrained in values (cOLS), Transaction Matrix   
		(TM), and custom (user-defined) cases. Eventual regression analysis and 
		a Monte-Carlo-based t-test of mean NRMSE are performed, the sample being
		drawn from a uniform or a user-specified distribution (a Mata function).

		Author: Ilya Bolotov, MBA, Ph.D.                                        
		Date: 31 January 2022                                                   
	*/
	tempname t
	// syntax
	syntax																	///
	anything [if] [in] [,													///
		COLS TM																///
		Model(string) Constraints(string) Slackvars(string)					///
		ZERODiagonal noMC													///
		TOLerance(real 0) Level(cilevel)									///
		SEED(int `=c(rngseed_mt64s)') ITERate(int `=c(maxiter)')			///
		DISTribution(string) noTRACE										///
	]
	// adjust and preprocess options                                            
	if "`cols'" != "" & "`tm'" != "" {				 // either cOLS or TM option
		di as err "options cols and tm are mutually incompatible"
		exit 198
	}
	if `"`distribution'"' == "" {					 // function name -> pointer
		loc distribution "lppinv_runiform"
	}
	// select a subset of the data (if applicable)                              
	if `"`if'`in'"' != "" {
		preserve
		qui keep `if' `in'
	}
	// perform the pinv estimation via SVD                                      
	sca `t' = clock(c(current_date) + " " + c(current_time), "DMYhms")
	mata: lppinv(                                                           ///
		"`cols'`tm'", `"`anything'"', `"`model'"', `"`constraints'"',       ///
		`"`slackvars'"', ("`zerodiagonal'" != "" ? 1 : 0),                  ///
		("`mc'" != "" ? 0 : 1), ("`trace'" != "" ? 0 : 1),                  ///
		(`tolerance' ? `tolerance' : .), `level', `seed', `iterate',        ///
		&`distribution'()                                                   ///
	)
	sca `t' = clock(c(current_date) + " " + c(current_time), "DMYhms") - `t'
	// print output                                                             
	di as txt _n "Converged in `=round(`t'/1000, .001)'s with NRMSE = "		///
			  _c "`=round(r(nrmse), .0001)', solution stored in"			///
			  _c "{stata ret li: r(solution)}"
	// return output                                                            
	ret add
end

version 16.0
loc RS        real scalar
loc RV        real colvector
loc RM        real matrix
loc SS        string scalar
loc TM        transmorphic matrix
loc PF        pointer(`RM' function) scalar
loc VV        void

loc version   "version 16:"

mata:
mata set matastrict on

`VV' lppinv(`SS' lp, `SS' rhs, `SS' lhs_m, `SS' lhs_c, `SS' lhs_s, |`RS' f_d,   
            `RS' f_cv, `RS' f_t, `RS' tol, `RS' lvl, `RS' seed, `RS' iter,      
            `PF' dist)                                             /* solver  */
{
	`RS' r, c, i
	`RV' x, e
	`RM' b, M, C, S, a
	`TM' tmp, f

	// general configuration                                                    
	tmp   = _stata("`version' tsset, noq", 1)
	b     = (! _stata("`version' confirm numeric var "           + /* RHS     */
		    regexr(rhs, ".+\.", ""), 1)
		? st_data(., (! tmp ? _st_tsrevar(tokens(rhs)) : tokens(rhs)))
		: (length(tokens(rhs)) == 1 ? st_matrix(rhs) : J(0,0,.))
	)
	M     = (! _stata("`version' confirm numeric var "           + /* LHS     */
		    regexr(lhs_m, ".+\.", ""), 1)
		? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_m)) : tokens(lhs_m)))
		: (length(tokens(lhs_m)) == 1 ? st_matrix(lhs_m) : J(0,0,.))
	)
	C     = (! _stata("`version' confirm numeric var "           +
		    regexr(lhs_c, ".+\.", ""), 1)
		? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_c)) : tokens(lhs_c)))
		: (length(tokens(lhs_c)) == 1 ? st_matrix(lhs_c) : J(0,0,.))
	)
	S     = (! _stata("`version' confirm numeric var "           +
		    regexr(lhs_s, ".+\.", ""), 1)
		? st_data(., (! tmp ? _st_tsrevar(tokens(lhs_s)) : tokens(lhs_s)))
		: (length(tokens(lhs_s)) == 1 ? st_matrix(lhs_s) : J(0,0,.))
	)
	f_d   = f_d   != . ? f_d  : 0                    /* flag: zero-diagonal   */
	f_cv  = f_cv  != . ? f_cv : 1                    /* flag: critical values */
	f_t   = f_t   != . ? f_t  : 1                    /* flag: trace           */
	tol   = tol   != . ? tol  : .                    /* tolerance             */
	lvl   = lvl   != . ? lvl  : c("level")           /* confidence level      */
	seed  = seed  != . ? seed : c("rngseed_mt64s")   /* MC seed               */
	iter  = iter  != . ? iter : 500                  /* MC iterations         */
	dist  = dist       ? dist : &lppinv_runiform()   /* MC distribution       */

	// prepare the LHS (left-hand side), `a`, and RHS (right-hand side), `b`    
	if (strlower(lp) == "cols") {                    /* LS-LP type: cOLS      */
		if (rows(C) + rows(M) > rows(b)) b = b\b
	}
	if (strlower(lp) == "tm") {                      /* LS-LP type: TM        */
		if (! rows(b) | (rows(b) & cols(b) != 2)                               |
		                (rows(S) & cols(S) != 2)) {
			errprintf("Error: TM requires two columns in `b`\n" )
			exit(503)
		}
		/* C -> characteristic matrix                                         */
		i = rows(b) - rows(M)
		r = rows(select(tmp=b[1..i,1], tmp :< .))    /* rows and cols         */
		c = rows(select(tmp=b[1..i,2], tmp :< .))
		C = I(r)#J(1,c,1)\J(1,r,1)#I(c)              /* row- (first), colsums */
		/* S -> characteristic matrix                                         */
		if (S != J(0,0,.))
			S = select(S[,1]\S[,2], S[,1]\S[,2] :< .)
	}
	/* M, C, S -> `a`                                                         */
	if (f_d & strlower(lp) == "tm")
		M = (rows(M) ? M : J(0,r*c,.))\              /* diagonal of `C` -> M  */
	        ((tmp=(I(min((r,c)))#(1,J(1,c,0)))[,(1..min((r,c))*c)]),
	        J(rows(tmp),r*c-cols(tmp),0))
	a = (rows(C) ? C,(rows(S) ? S : J(rows(C),0,.)) : J(0,cols(M) + cols(S),.))\
	    (rows(M) ? M,J(rows(M),cols(S),0) : J(0,cols(C) + cols(S),.))
	/*`b` -> (, 1)                                                            */
	if (strlower(lp) == "tm")
		b = b[1..r,1]\b[1..c,2]\
		    (rows(b)>max((r,c)) ? rowsum(b[max((r,c))+1..rows(b),]) : J(0,1,.))
	if (f_d & strlower(lp) == "tm")
		b = b\(f_d ? J(rows(a)-rows(b),1,0) : J(0,1,.))
	/* check the dimensions of `a` and `b`                                    */
	if (rows(a) != rows(b)) {
		errprintf("Error: `a` and `b` are not conformable\n")
		exit(503)
	}
	/* drop missing values in `a` and `b`                                     */
	a = select(a, (tmp=rowmissing(a)+rowmissing(b)) :== 0)
	b = select(b,  tmp                              :== 0)
	st_matrix("r(a)", a, "hidden")
	st_matrix("r(b)", b, "hidden")
	C = M = rows(select(C, rowmissing(C) :== 0))     /* clear memory          */
	S =     cols(S)

	// obtain the SVD-based solution of the matrix equation `a @ x = b`         
	x = svsolve(a, b, ., tol)                        /* solution, NRMSE, R2_C */
	e = e\sqrt(cross((tmp=b - a * x), tmp) / (r=rows(b)) / variance(b))\        
	      (C ? 1 - cross((tmp=b[1..C]  - (a * x)[1..C]), tmp[1..C])            /
	               cross((tmp=b[1..C] :- mean(b[1..C])), tmp[1..C]) : .)
	/* regression results (if applicable)                                     */
	st_framecreate(f=st_tempname())
	if (cols(a) <= rows(b)) {
		(void) _stata("`version' frame "+f+": svmat r(b), n(b)", 1)            &
		       _stata("`version' frame "+f+": svmat r(a), n(a)", 1)            &
		       _stata("`version' frame "+f+": reg b1 a*, noc  "                +
		       "l("+strofreal(lvl)+")",                      ! f_t)
	}
	/* NRMSE t-test for `a', based on MC with iter simulations                */
	if (e[1] != . & f_cv) {                          /* skip if NRMSE == .    */
		e = e\((tmp=trunc(floatround(log10(iter)))) + trunc(floatround(tmp / 3))
		      + 2)                                   /* format: %e[3].0fc     */
		printf("\n{txt}Simulations ({res}%"+strofreal(e[3] - 1)+".0fc{txt})\n",
		iter)
		printf("----+--- 1 ---+--- 2 ---+--- 3 ---+--- 4 ---+--- 5\n")
		rseed(seed)
		for(i = 1; i < (iter + 1); i++) {
			e = e\sqrt(cross((tmp=(b=(*dist)(r,1)) - a * svsolve(a, b, ., tol)),
			      tmp) / r / variance(b))
			if (! mod(i, 5)) { printf("....."); displayflush(); }
			if (! mod(i, 50))  printf("%"+strofreal(e[3])+".0fc\n", i)
		}
		st_matrix("r(e)", e[4::rows(e)], "hidden")
		(void) _stata("`version' frame "+f+": svmat r(e), n(e)", 1)            &
		       _stata("`version' frame "+f+": ttest e1 == "                    +
		       strofreal(e[1]) + ", l("+strofreal(lvl)+")", ! f_t)
	}

	// return the solution `x', matrix `a', and NRMSE                           
	st_matrix("r(solution)", strlower(lp) == "tm" ? colshape(x[1..rows(x)-S], c)
                                                  : x[1..rows(x)-S])
	st_matrix("r(a)",        a, "hidden")
	st_numscalar("r(r2_c)",  strlower(lp) == "tm" & e[2] >= 0 & e[2] <= 1 ? e[2]
                                                                          : .)
	st_numscalar("r(nrmse)", e[1])
}

`RM' lppinv_runiform(`RS' r, `RS' c) return(runiform(r, c))        /* dummy   */

end