Fixed update regression coefficients and added Naive and Fair function for loocv update

This commit is contained in:
Joakim Skogholt 2024-04-24 10:32:53 +02:00
parent 7d370f9125
commit 9ed3bdaea8
2 changed files with 65 additions and 2 deletions

View file

@ -1,5 +1,66 @@
"""
Solves the model update problem explicitly as a least squares problem with stacked matrices.
In practice the most naive way of approaching the update problem
"""
function TRLooCVUpdateNaive(X, y, lambdasu, bold)
n, p = size(X);
rmsecvman = zeros(length(lambdasu));
for i = 1:n
inds = setdiff(1:n, i);
Xdata = X[inds,:];
ydata = y[inds];
mX = mean(Xdata, dims=1);
my = mean(ydata);
Xs = Xdata .- mX;
ys = ydata .- my;
p2 = size(Xdata, 2);
for j = 1:length(lambdasu)
betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bold];
rmsecvman[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2;
end
end
rmsecvman = sqrt.(1/n .* rmsecvman);
return rmsecvman
end
"""
Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections.
Hence regression coefficients are calculated for all lambda values
"""
function TRLooCVUpdateFair(X, y, lambdasu, bold)
n, p = size(X);
rmsecvman = zeros(length(lambdasu))
for i = 1:n
inds = setdiff(1:n, i);
Xdata = X[inds,:];
ydata = y[inds];
mX = mean(Xdata, dims=1);
my = mean(ydata);
Xs = Xdata .- mX;
ys = ydata .- my;
U, s, V = svd(Xs, full=false);
denom = broadcast(.+, broadcast(./, lambdasu, s'), s')'; # denom = lambda/s + lambda = (lambda + s's) / lambda
denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2)) # denom2 = 1 + lambda/(s's) = (s's + lambda) / (s's)
# Calculating regression coefficients and residual
bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bold .- V * broadcast(./, V' * bold, denom2);
rmsecvman += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)';
end
rmsecvman = sqrt.(1/n .* rmsecvman);
return rmsecvman, bcoeffs
end
"""
Fast k-fold cv for updating regression coefficients
@ -70,7 +131,7 @@ end
# Calculating rmsecv and regression coefficients
press = sum(rescv.^2, dims=1)';
rmsecv = sqrt.(1/n .* press);
bcoeffs = V * broadcast(./, (U' * y), denom);
bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bold .- V * broadcast(./, V' * bold, denom2);
bcoeffs = regMat \ bcoeffs;
# Creating regression coefficients for uncentred data
@ -148,7 +209,7 @@ lambdarmsecv = lambdas[idminrmsecv];
lambdagcv = lambdas[idmingcv];
# Calculating regression coefficients
bcoeffs = V * broadcast(./, (U' * y), denom);
bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bold .- V * broadcast(./, V' * bold, denom2);
bcoeffs = regMat \ bcoeffs;
if my != 0

View file

@ -23,6 +23,8 @@ export regularizationMatrix
export TRLooCVUpdate
export TRSegCVUpdate
export plegendre
export TRLooCVUpdateFair
export TRLooCVUpdateNaive
include("convenience.jl")
include("TR.jl")