""" Solves the model update problem explicitly as a least squares problem with stacked matrices. In practice the most naive way of approaching the update problem """ function TRLooCVUpdateNaive(X, y, lambdasu, bOld) n, p = size(X); rmsecvman = zeros(length(lambdasu)); for i = 1:n inds = setdiff(1:n, i); Xdata = X[inds,:]; ydata = y[inds]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; p2 = size(Xdata, 2); for j = 1:length(lambdasu) betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bOld]; rmsecvman[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2; end end rmsecvman = sqrt.(1/n .* rmsecvman); return rmsecvman end """ Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections. Hence regression coefficients are calculated for all lambda values """ function TRLooCVUpdateFair(X, y, lambdasu, bOld) n, p = size(X); rmsecvman = zeros(length(lambdasu)) for i = 1:n inds = setdiff(1:n, i); Xdata = X[inds,:]; ydata = y[inds]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; U, s, V = svd(Xs, full=false); denom = broadcast(.+, broadcast(./, lambdasu, s'), s')'; denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2)) # Calculating regression coefficients and residual bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); rmsecvman += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)'; end rmsecvman = sqrt.(1/n .* rmsecvman); return rmsecvman end """ Fast k-fold cv for updating regression coefficients """ function TRSegCVUpdate(X, y, lambdas, cv, bOld, regType="L2", derOrder=0) n, p = size(X); # Finding appropriate regularisation matrix if regType == "bc" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end P, _ = plegendre(derOrder-1, p); epsilon = 1e-14; regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # GL fractional derivative regulariztion C = ones(p)*1.0; for k in 2:p C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end # Preliminary calculations mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = y .- my; X = X / regMat; U, s, V = svd(X, full=false); n_seg = maximum(cv); n_lambdas = length(lambdas); # Finding residuals denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) yhat = broadcast(./, s .* (U'*y), denom) yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) resid = broadcast(.-, y, U * yhat) # Finding cross-validated residuals rescv = zeros(n, n_lambdas); sdenom = sqrt.(broadcast(./, s, denom))'; for seg in 1:n_seg Useg = U[vec(cv .== seg),:]; Id = 1.0 * I(size(Useg,1)) .- 1/n; for k in 1:n_lambdas Uk = Useg .* sdenom[k,:]'; rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k]; end end # Calculating rmsecv and regression coefficients press = sum(rescv.^2, dims=1)'; rmsecv = sqrt.(1/n .* press); bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); bcoeffs = regMat \ bcoeffs; # Creating regression coefficients for uncentred data if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end # Finding rmsecv-minimal lambda value and associated regression coefficients lambda_min, lambda_min_ind = findmin(rmsecv) lambda_min_ind = lambda_min_ind[1] b_lambda_min = bcoeffs[:,lambda_min_ind] return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs end """ Updates regression coefficient by solving the augmented TR problem [Xs; sqrt(lambda)*I] * beta = [ys; sqrt(lambda)*b_old] Note that many regularization types are supported but the regularization is on the difference between new and old reg. coeffs. and so most regularization types are probably not meaningful. """ function TRLooCVUpdate(X, y, lambdas, bOld, regType="L2", derOrder=0) n, p = size(X); # Finding appropriate regularisation matrix if regType == "bc" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end P, _ = plegendre(derOrder-1, p); epsilon = 1e-14; regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # GL fractional derivative regulariztion C = ones(p)*1.0; for k in 2:p C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end # Preliminary calculations mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = y .- my; X = X / regMat; U, s, V = svd(X, full=false); # Main calculations denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) yhat = broadcast(./, s .* (U'*y), denom); yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) resid = broadcast(.-, y, U * yhat) H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); rmsecv = sqrt.(1/n .* press); gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); # Finding lambda that minimises rmsecv and GCV idminrmsecv = findmin(press)[2][1]; idmingcv = findmin(gcv)[2][1]; lambdarmsecv = lambdas[idminrmsecv]; lambdagcv = lambdas[idmingcv]; # Calculating regression coefficients bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); bcoeffs = regMat \ bcoeffs; if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end brmsecv = bcoeffs[:, idminrmsecv]; bgcv = bcoeffs[:, idmingcv]; return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs end """ ### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION <-- Check that it is correctly added:) ### regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) Calculates and returns square regularization matrix. Inputs: - X/p : Size of regularization matrix or data matrix (size of reg. mat. will then be size(X,2) - regType : "L2" (returns identity matrix) "bc" (boundary condition, forces zero on right endpoint for derivative regularization) or "legendre" (no boundary condition, but fills out reg. mat. with lower order polynomial trends to get square matrix) "std" (standardization, FILL OUT WHEN DONE) - regParam1 : Int64, Indicates degree of derivative regularization (0 gives L\\_2) - regParam2 : For regType=="plegendre" added polynomials are multiplied by sqrt(regParam2) Output: Square regularization matrix """ function regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) if regType == "std" regParam2 = Diagonal(vec(std(X, dims=1))); end regMat = regularizationMatrix(size(X,2); regType, regParam1, regParam2) return regMat end function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) if regType == "bc" # Discrete derivative with boundary conditions regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end elseif regType == "legendre" # Fill in polynomials in bottom row(s) to get square matrix regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end P, _ = plegendre(regParam1-1, p); regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" # Standardization regMat = regParam2; elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion # regParam1 is alpha (order of fractional derivative) C = ones(p)*1.0; for k in 2:p C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end return regMat end """ function TRLooCV bpress, bgcv, rmsecv, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV = TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1) regType: 'bc', 'legendre', 'L2', 'std', 'GL' """ function TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1) n, p = size(X); mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = y .- my; if regType == "bc" regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end P, _ = plegendre(regParam-1, p); regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion # regParam1 is alpha (order of fractional derivative) C = ones(p)*1.0; for k in 2:p C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end X = X / regMat; U, s, V = svd(X, full=false) denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom)); rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); rmsecv = sqrt.(1/n .* press); GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); idminPRESS = findmin(press)[2][1]; # First index selects coordinates, second selects '1st coordinate' idminGCV = findmin(GCV)[2][1]; # First index selects coordinates, second selects '1st coordinate' lambdaPRESS = lambdas[idminPRESS]; lambdaGCV = lambdas[idminGCV]; bcoeffs = V * broadcast(./, (U' * y), denom); bcoeffs = regMat \ bcoeffs; if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end bpress = bcoeffs[:, idminPRESS] bgcv = bcoeffs[:, idminGCV] return bpress, bgcv, rmsecv, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV, bcoeffs end """ function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14) Segmented cross-validation based on the Sherman-Morrison-Woodbury updating formula. Inputs: - X : Data matrix - y : Response vector - lambdas : Vector of regularization parameter values - cv : Vector of length n indicating segment membership for each sample - regType, regParam1, regParam2 : Inputs to regularizationMatrix function Outputs: b_lambda_min, rmsecv, lambda_min, lambda_min_ind """ function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14) n, p = size(X); mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = y .- my; if regType == "bc" regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end P, _ = plegendre(regParam-1, p); regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion # regParam1 is alpha (order of fractional derivative) C = ones(p)*1.0; for k in 2:p C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end X = X / regMat; U, s, V = svd(X, full=false); n_seg = maximum(cv); n_lambdas = length(lambdas); denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom)); rescv = zeros(n, n_lambdas); sdenom = sqrt.(broadcast(./, s, denom))'; for seg in 1:n_seg Useg = U[vec(cv .== seg),:]; Id = 1.0 * I(size(Useg,1)) .- 1/n; for k in 1:n_lambdas Uk = Useg .* sdenom[k,:]'; rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k]; end end press = sum(rescv.^2, dims=1)'; rmsecv = sqrt.(1/n .* press); bcoeffs = V * broadcast(./, (U' * y), denom); bcoeffs = regMat \ bcoeffs; if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end lambda_min, lambda_min_ind = findmin(rmsecv) lambda_min_ind = lambda_min_ind[1] b_lambda_min = bcoeffs[:,lambda_min_ind] return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs end """ function plegendre(d, p) Calculates orthonormal Legendre polynomials using a QR factorisation. Inputs: - d : polynomial degree - p : size of vector Outputs: - Q : (d+1) x p matrix with basis - R : matrix from QR-factorisation """ function plegendre(d, p) P = ones(p, d+1); x = (-1:2/(p-1):1)'; for k in 1:d P[:,k+1] = x.^k; end factorisation = qr(P); Q = Matrix(factorisation.Q)'; R = Matrix(factorisation.R); return Q, R end """ "Manual" k-fold cv for solving the Ridge regression problem. The LS problem is solved explicitly and no shortcuts are used. """ function TRSegCVNaive(X, y, lambdas, cvfolds) n, p = size(X); rmsecv = zeros(length(lambdas)); nfolds = length(unique(cvfolds)); for j = 1:length(lambdas) for i = 1:nfolds inds = (cvfolds .== i); Xdata = X[vec(.!inds),:]; ydata = y[vec(.!inds)]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; zeros(p,1)]; rmsecv[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2); end end rmsecv = sqrt.(1/n .* rmsecv); return rmsecv end """ "Manual" k-fold cv for solving the Ridge regression problem update. The LS problem is solved explicitly and no shortcuts are used. """ function TRSegCVUpdateNaive(X, y, lambdas, cvfolds, bOld) n, p = size(X); rmsecvman = zeros(length(lambdas)); nfolds = length(unique(cvfolds)); for j = 1:length(lambdas) for i = 1:nfolds inds = (cvfolds .== i); Xdata = X[vec(.!inds),:]; ydata = y[vec(.!inds)]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; sqrt(lambdas[j]) * bOld]; rmsecvman[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2); end end rmsecvman = sqrt.(1/n .* rmsecvman); return rmsecvman end """ K-fold CV for the Ridge regression update problem, using the 'SVD-trick' for calculating the regression coefficients. """ function TRSegCVUpdateFair(X, y, lambdas, cv, bOld) n, p = size(X); rmsecvman = zeros(length(lambdas)); nfolds = length(unique(cv)); for i = 1:nfolds inds = (cv .== i); Xdata = X[vec(.!inds),:]; ydata = y[vec(.!inds)]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; U, s, V = svd(Xs, full=false); denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2)); # Calculating regression coefficients bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); rmsecvman += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)'; end rmsecvman = sqrt.(1/n .* rmsecvman); return rmsecvman end """ K-fold CV for the Ridge regression problem, using the 'SVD-trick' for calculating the regression coefficients. """ function TRSegCVFair(X, y, lambdas, cv) n, p = size(X); rmsecv = zeros(length(lambdas)); nfolds = length(unique(cv)); for i = 1:nfolds inds = (cv .== i); Xdata = X[vec(.!inds),:]; ydata = y[vec(.!inds)]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; U, s, V = svd(Xs, full=false); denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2)); # Calculating regression coefficients bcoeffs = V * broadcast(./, (U' * ys), denom); rmsecv += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)'; end rmsecv = sqrt.(1/n .* rmsecv); return rmsecv end