using Optimization # For numerical minimization of PRESS statistic using OptimizationOptimJL # For numerical minimization of PRESS statistic struct TRSVD U::Matrix{Float64} s::Vector{Float64} V::Matrix{Float64} mX::Matrix{Float64} regType::String regParam1::Float64 regMat::Matrix{Float64} n::Int64 p::Int64 end struct TRBidiag W::Matrix{Float64} B::Bidiagonal{Float64, Vector{Float64}} T::Matrix{Float64} y::Vector{Float64} mX::Matrix{Float64} my::Float64 regType::String regParam1::Float64 regMat::Matrix{Float64} n::Int64 p::Int64 end """ ### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION ### regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) Calculates and returns square regularization matrix. Inputs: - X/p : Size of regularization matrix or data matrix (size of reg. mat. will then be size(X,2) - regType : "L2" (returns identity matrix) "bc" (boundary condition, forces zero on right endpoint for derivative regularization) or "legendre" (no boundary condition, but fills out reg. mat. with lower order polynomial trends to get square matrix) "std" (standardization, FILL OUT WHEN DONE) - regParam1 : Int64, Indicates degree of derivative regularization (0 gives L\\_2) - regParam2 : For regType=="plegendre" added polynomials are multiplied by sqrt(regParam2) Output: Square regularization matrix """ function regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) if regType == "std" regParam2 = Diagonal(vec(std(X, dims=1))); end regMat = regularizationMatrix(size(X,2); regType, regParam1, regParam2) return regMat end function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) if regType == "bc" # Discrete derivative with boundary conditions regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end elseif regType == "legendre" # Fill in polynomials in bottom row(s) to get square matrix regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end P, _ = plegendre(regParam1-1, p); regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" # Standardization regMat = regParam2; elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion # regParam1 is alpha (order of fractional derivative) C = ones(p)*1.0; for k in 2:p C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end return regMat end """ function TRBidiagDecomp(X, y, A=(minimum(size(X))-1), regType="L2", regParam1=0, regParam2=1e-14) Calculates regularization matrix (using function "RegularizationMatrix"), and centres and transforms data matrix according to "X / regMat". Output is an object of type "TRBidiag" and is used as input to other TR functions. """ function TRBidiagDecomp(X, y, A=(minimum(size(X))-1), regType="L2", regParam1=0, regParam2=1e-14) n, p = size(X); mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = vec(y .- my); regMat = regularizationMatrix(X; regType, regParam1, regParam2); X = X / regMat; _, W, B, T = bidiag2(X, y, A); TRObj = TRBidiag(W, B, T, y, mX, my, regType, regParam1, regMat, n, p); return TRObj end """ function TRSVDDecomp(X, regType="L2", regParam1=0, regParam2=1e-14) Calculates regularization matrix (using function "RegularizationMatrix"), and centres and transforms data matrix according to "X / regMat". Output is an object of type "TRSVD" and is used as input to other TR functions. """ function TRSVDDecomp(X, regType="L2", regParam1=0, regParam2=1e-14) n, p = size(X); mX = mean(X, dims=1); X = X .- mX; regMat = regularizationMatrix(X; regType, regParam1, regParam2); X = X / regMat; U, s, V = svd(X, full=false); TRObj = TRSVD(U, s, V, mX, regType, regParam1, regMat, n, p); return TRObj end function TRSegmentOrth(X, segments) n = size(X,1); n_segments = maximum(segments); U = zeros(n,n); for seg in 1:n_segments inds = vec(seg .== segments) U[inds, inds], _, _ = svd(X[inds,:], full=false); end return U end """ function TRVirCV(X, y, segments, lambdas, regType="L2", regParam1=0, regParam2=1e-14) Segmented virtual cross-validation (VirCV) for TR models. Outputs: b, press, lambda_min, lambda_min_ind, GCV b are (virtual) press-minimal regression coefficients. """ function TRVirCV(X, y, segments, lambdas, regType="L2", regParam1=0, regParam2=1e-14) U_segments = TRSegmentOrth(X, segments); bs = vec(sum(U_segments, dims=1).^2); n, p = size(X); mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = vec(y .- my); X = U_segments' * X; y = U_segments' * y; regMat = regularizationMatrix(p; regType, regParam1, regParam2); X = X / regMat; U, s, V = svd(X, full=false); denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; H = broadcast(.+, U.^2 * broadcast(./, s, denom), bs./n); resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom)); rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); #rmsecv = sqrt.(1/n .* press); GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); lambda_min, lambda_min_ind = findmin(press); lambda_min_ind = lambda_min_ind[1]; denom2 = broadcast(.+, lambda_min ./ s', s')'; b = V * broadcast(./, (U' * y), denom2); b = regMat \ b; b = [my .- mX*b; b]; return b, press, lambda_min, lambda_min_ind, GCV end """ function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14) Segmented cross-validation based on the Sherman-Morrison-Woodbury updating formula. Inputs: - X : Data matrix - y : Response vector - lambdas : Vector of regularization parameter values - cv : Vector of length n indicating segment membership for each sample - regType, regParam1, regParam2 : Inputs to regularizationMatrix function Outputs: rmsecv, b, lambda_min, lambda_min_ind. b are regression coefficients corresponding to the lambda value minimising the CV-error. """ function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14) TR = TRSVDDecomp(X, regType, regParam1, regParam2); n_seg = maximum(cv); n_lambdas = length(lambdas); my = mean(y); y = y .- my; denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')'; resid = broadcast(.-, y, TR.U * broadcast(./, TR.s .* (TR.U'*y), denom)); rescv = zeros(TR.n, n_lambdas); sdenom = sqrt.(broadcast(./, TR.s, denom))'; for seg in 1:n_seg Useg = TR.U[vec(cv .== seg),:]; Id = 1.0 * I(size(Useg,1)) .- 1/TR.n; for k in 1:n_lambdas Uk = Useg .* sdenom[k,:]'; rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k]; end end press = sum(rescv.^2, dims=1)'; rmsecv = sqrt.(1/TR.n .* press); lambda_min, lambda_min_ind = findmin(rmsecv) lambda_min_ind = lambda_min_ind[1] b = TRRegCoeffs(TR, y, lambda_min, my) return b, rmsecv, lambda_min, lambda_min_ind end """ TRRegCoeffs(X, y, lambdas, regType="L2", regParam1=0, regParam2=1e-14) TRRegCoeffs(TR::TRSVD, y, lambdas, my=0) Calculates regression coefficients for TR model. First function returns "bcoeffs, TR::TRSVD", second function returns "bcoeffs". """ function TRRegCoeffs(X, y, lambdas, regType="L2", regParam1=0, regParam2=1e-14) TR = TRSVDDecomp(X, regType, regParam1, regParam2); my = mean(y); y = y .- my; @inline bcoeffs = TRRegCoeffs(TR, y, lambdas, my); return bcoeffs, TR end function TRRegCoeffs(TR::TRSVD, y, lambdas, my=0) # Don't forget about centering (both X and y) - Maybe do it outside of this function? denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')'; bcoeffs = TR.V * broadcast(./, (TR.U' * y), denom); bcoeffs = TR.regMat \ bcoeffs; if my != 0 bcoeffs = [my .- TR.mX*bcoeffs; bcoeffs]; end return bcoeffs end """ TRPress(TR::TRSVD, y, lambdas) TRPress(TR::TRSVD, y, lambdas, H, resid) Calculates and returns press-values (as vector) for lambda values given as input. """ function TRPress(TR::TRSVD, y, lambdas) denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')'; resid = broadcast(.-, y, TR.U * broadcast(./, TR.s .* (TR.U'*y), denom)); H = broadcast(.+, TR.U.^2 * broadcast(./, TR.s, denom), 1/TR.n); @inline press = TRPress(TR, y, lambdas, H, resid); return press end function TRPress(TR::TRSVD, y, lambdas, H, resid) rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); return press end """ function TRGCV(TR::TRSVD, y, lambdas) function TRGCV(TR::TRSVD, y, lambdas, H, resid) Calculates and returns GCV-values (as vector) for lambda values given as input. """ function TRGCV(TR::TRSVD, y, lambdas) denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')'; resid = broadcast(.-, y, TR.U * broadcast(./, TR.s .* (TR.U'*y), denom)); H = broadcast(.+, TR.U.^2 * broadcast(./, TR.s, denom), 1/TR.n); @inline GCV = TRGCV(TR, y, lambdas, H, resid); return GCV end function TRGCV(TR::TRSVD, y, lambdas, H, resid) GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); return GCV; end """ function TRLooCV(X, y, lambdas, regType="L2", regParam1=0, regParam2=1e-14) function TRLooCV(TR::TRSVD, y, lambdas) Calculates PRESS- and GCV-minimal regression coefficients from the reg. param values in lambdas. Outputs: BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV """ function TRLooCV(X, y, lambdas, regType="L2", regParam1=0, regParam2=1e-14) TR = TRSVDDecomp(X, regType, regParam1, regParam2); BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV = TRLooCV(TR, y, lambdas); return BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV end function TRLooCV(TR::TRSVD, y, lambdas) my = mean(y); y = y .- my; denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')'; H = broadcast(.+, TR.U.^2 * broadcast(./, TR.s, denom), 1/TR.n); resid = broadcast(.-, y, TR.U * broadcast(./, TR.s .* (TR.U'*y), denom)); @inline press = TRPress(TR, y, lambdas, H, resid); @inline GCV = TRGCV(TR, y, lambdas, H, resid); idminPRESS = findmin(press)[2][1]; # First index selects coordinates, second selects '1st coordinate' idminGCV = findmin(GCV)[2][1]; # First index selects coordinates, second selects '1st coordinate' lambdaPRESS = lambdas[idminPRESS]; lambdaGCV = lambdas[idminGCV]; BPRESS = TRRegCoeffs(TR, y, lambdaPRESS, my); BGCV = TRRegCoeffs(TR, y, lambdaGCV, my); return BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV end """ function PlotTRLooCV(lambdas, BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV) The function uses lambdas (vector) + the output from TRLooCV to plot data, PRESS- and GCV-curves, as well as PRESS- and GCV-minimal regression coefficients. """ function PlotTRLooCV(lambdas, BPRESS, BGCV, TR, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV) plta = plot((TR.U * diagm(TR.s) * TR.V' .+ TR.mX)', legend=false) pltb = plot(log10.(lambdas), xlabel="log10(lambda)", press, label="PRESS"); plot!(log10.(lambdas), GCV, label="GCV") #plot!(log10(lambdaPRESS), press[idminPRESS]) pltc = plot(BPRESS[2:end], label="B-press") plot!(BGCV[2:end], label="B-GCV") #pltd = plot(X', legend=false) plt = plot(plta, pltb, pltc, layout=(2,2)) display(plt) end """ function TRLooCVNum(TR::TRSVD, y, lambdaInit=1) TRLooCVNum(X, y, lambdaInit=1, regType="L2", regParam1=0, regParam2=1e-14) Finds regularization paramter value minimising the PRESS-statistic and returns "b, lambda_min". """ function TRLooCVNum(TR::TRSVD, y, lambdaInit=1) function pressfunc(lambdaval) @inline pressval = TRPress(TR, y, lambdaval[1]) return pressval end my = mean(y); y = y .- my; prob = OptimizationProblem((x, p) -> pressfunc(x), [1.0], []) sol = solve(prob, NelderMead())[1]; b = TRRegCoeffs(TR, y, sol, my); return b, sol[1] end function TRLooCVNum(X, y, lambdaInit=1, regType="L2", regParam1=0, regParam2=1e-14) TR = TRSVDDecomp(X, regType, regParam1, regParam2); @inline b, lambda_min = TRLooCVNum(TR, y, lambdaInit) end """ function TRGCVNum(TR::TRSVD, y, lambdaInit=1) TRGCVNum(X, y, lambdaInit=1, regType="L2", regParam1=0, regParam2=1e-14) Finds regularization paramter value minimising the PRESS-statistic and returns "b, lambda_min". """ function TRGCVNum(TR::TRSVD, y, lambdaInit=1) function gcvfunc(lambdaval) @inline gcvval = TRGCV(TR, y, lambdaval[1]); return gcvval end my = mean(y); y = y .- my; prob = OptimizationProblem((x, p) -> gcvfunc(x), [1.0], []) sol = solve(prob, NelderMead())[1]; b = TRRegCoeffs(TR, y, sol, my); return b, sol[1] end function TRGCVNum(X, y, lambdaInit=1, regType="L2", regParam1=0, regParam2=1e-14) TR = TRSVDDecomp(X, regType, regParam1, regParam2); @inline b, lambda_min = TRGCVNum(TR, y, lambdaInit) end