""" Solves the model update problem explicitly as a least squares problem with stacked matrices. In practice the most naive way of approaching the update problem """ function TRLooCVUpdateNaive(X, y, lambdasu, bOld) n, p = size(X); rmsecv = zeros(length(lambdasu)); for i = 1:n inds = setdiff(1:n, i); Xdata = X[inds,:]; ydata = y[inds]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; p2 = size(Xdata, 2); for j = 1:length(lambdasu) betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bOld]; rmsecv[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2; end end rmsecv = sqrt.(1/n .* rmsecv); return rmsecv end """ Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections. Hence regression coefficients are calculated for all lambda values """ function TRLooCVUpdateFair(X, y, lambdasu, bOld) n, p = size(X); rmsecv = zeros(length(lambdasu)) for i = 1:n inds = setdiff(1:n, i); Xdata = X[inds,:]; ydata = y[inds]; mX = mean(Xdata, dims=1); my = mean(ydata); Xs = Xdata .- mX; ys = ydata .- my; U, s, V = svd(Xs, full=false); denom = broadcast(.+, broadcast(./, lambdasu, s'), s')'; denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2)) # Calculating regression coefficients and residual bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); rmsecv += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)'; end rmsecv = sqrt.(1/n .* rmsecv); return rmsecv end """ Updates regression coefficient by solving the augmented TR problem [Xs; sqrt(lambda)*I] * beta = [ys; sqrt(lambda)*b_old] Note that many regularization types are supported but the regularization is on the difference between new and old reg. coeffs. and so most regularization types are probably not meaningful. """ function TRLooCVUpdate(X, y, lambdas, bOld, regType="L2", derOrder=0) n, p = size(X); # Finding appropriate regularisation matrix if regType == "bc" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end P, _ = plegendre(derOrder-1, p); epsilon = 1e-14; regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # GL fractional derivative regulariztion C = ones(p)*1.0; for k in 2:p C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end # Preliminary calculations mX = mean(X, dims=1); X = X .- mX; my = mean(y); y = y .- my; X = X / regMat; U, s, V = svd(X, full=false); # Main calculations denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) yhat = broadcast(./, s .* (U'*y), denom); yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) resid = broadcast(.-, y, U * yhat) H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); rmsecv = sqrt.(1/n .* press); gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); # Finding lambda that minimises rmsecv and GCV idminrmsecv = findmin(press)[2][1]; idmingcv = findmin(gcv)[2][1]; lambdarmsecv = lambdas[idminrmsecv]; lambdagcv = lambdas[idmingcv]; # Calculating regression coefficients bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); bcoeffs = regMat \ bcoeffs; if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end brmsecv = bcoeffs[:, idminrmsecv]; bgcv = bcoeffs[:, idmingcv]; return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs end """ # I denne gis X- og y-snitt som input til funksjonen istedenfor å regne ut fra input-dataene. """ function TRLooCVUpdateExperimental(X, y, lambdas, bOld, mX, my, regType="L2", derOrder=0) n, p = size(X); # Finding appropriate regularisation matrix if regType == "bc" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end elseif regType == "legendre" regMat = [I(p); zeros(derOrder,p)]; for i = 1:derOrder regMat = diff(regMat, dims = 1); end P, _ = plegendre(derOrder-1, p); epsilon = 1e-14; regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; elseif regType == "L2" regMat = I(p); elseif regType == "std" regMat = Diagonal(vec(std(X, dims=1))); elseif regType == "GL" # GL fractional derivative regulariztion C = ones(p)*1.0; for k in 2:p C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; end regMat = zeros(p,p); for i in 1:p regMat[i:end, i] = C[1:end-i+1]; end end # Preliminary calculations #mX = mean(X, dims=1); X = X .- mX; #my = mean(y); y = y .- my; X = X / regMat; U, s, V = svd(X, full=false); # Main calculations denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) yhat = broadcast(./, s .* (U'*y), denom); yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) resid = broadcast(.-, y, U * yhat) H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); rescv = broadcast(./, resid, broadcast(.-, 1, H)); press = vec(sum(rescv.^2, dims=1)); rmsecv = sqrt.(1/n .* press); gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); # Finding lambda that minimises rmsecv and GCV idminrmsecv = findmin(press)[2][1]; idmingcv = findmin(gcv)[2][1]; lambdarmsecv = lambdas[idminrmsecv]; lambdagcv = lambdas[idmingcv]; # Calculating regression coefficients bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); bcoeffs = regMat \ bcoeffs; if my != 0 bcoeffs = [my .- mX*bcoeffs; bcoeffs]; end brmsecv = bcoeffs[:, idminrmsecv]; bgcv = bcoeffs[:, idmingcv]; return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs end