597 lines
No EOL
17 KiB
Julia
597 lines
No EOL
17 KiB
Julia
"""
|
|
Solves the model update problem explicitly as a least squares problem with stacked matrices.
|
|
In practice the most naive way of approaching the update problem
|
|
"""
|
|
function TRLooCVUpdateNaive(X, y, lambdasu, bOld)
|
|
|
|
n, p = size(X);
|
|
rmsecvman = zeros(length(lambdasu));
|
|
|
|
for i = 1:n
|
|
inds = setdiff(1:n, i);
|
|
Xdata = X[inds,:];
|
|
ydata = y[inds];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
p2 = size(Xdata, 2);
|
|
|
|
for j = 1:length(lambdasu)
|
|
betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bOld];
|
|
rmsecvman[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2;
|
|
end
|
|
end
|
|
|
|
rmsecvman = sqrt.(1/n .* rmsecvman);
|
|
|
|
return rmsecvman
|
|
end
|
|
|
|
"""
|
|
Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections.
|
|
Hence regression coefficients are calculated for all lambda values
|
|
"""
|
|
function TRLooCVUpdateFair(X, y, lambdasu, bOld)
|
|
|
|
n, p = size(X);
|
|
rmsecvman = zeros(length(lambdasu))
|
|
|
|
for i = 1:n
|
|
inds = setdiff(1:n, i);
|
|
Xdata = X[inds,:];
|
|
ydata = y[inds];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
U, s, V = svd(Xs, full=false);
|
|
|
|
denom = broadcast(.+, broadcast(./, lambdasu, s'), s')';
|
|
denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2))
|
|
|
|
# Calculating regression coefficients and residual
|
|
bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2);
|
|
rmsecvman += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)';
|
|
end
|
|
|
|
rmsecvman = sqrt.(1/n .* rmsecvman);
|
|
|
|
return rmsecvman
|
|
end
|
|
|
|
"""
|
|
Fast k-fold cv for updating regression coefficients
|
|
"""
|
|
function TRSegCVUpdate(X, y, lambdas, cv, bOld, regType="L2", derOrder=0)
|
|
|
|
n, p = size(X);
|
|
|
|
# Finding appropriate regularisation matrix
|
|
if regType == "bc"
|
|
regMat = [I(p); zeros(derOrder,p)];
|
|
for i = 1:derOrder regMat = diff(regMat, dims = 1); end
|
|
elseif regType == "legendre"
|
|
regMat = [I(p); zeros(derOrder,p)];
|
|
for i = 1:derOrder regMat = diff(regMat, dims = 1); end
|
|
P, _ = plegendre(derOrder-1, p);
|
|
epsilon = 1e-14;
|
|
regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P;
|
|
elseif regType == "L2"
|
|
regMat = I(p);
|
|
elseif regType == "std"
|
|
regMat = Diagonal(vec(std(X, dims=1)));
|
|
elseif regType == "GL" # GL fractional derivative regulariztion
|
|
C = ones(p)*1.0;
|
|
for k in 2:p
|
|
C[k] = (1-(derOrder+1)/(k-1)) * C[k-1];
|
|
end
|
|
|
|
regMat = zeros(p,p);
|
|
|
|
for i in 1:p
|
|
regMat[i:end, i] = C[1:end-i+1];
|
|
end
|
|
end
|
|
|
|
# Preliminary calculations
|
|
mX = mean(X, dims=1);
|
|
X = X .- mX;
|
|
my = mean(y);
|
|
y = y .- my;
|
|
X = X / regMat;
|
|
U, s, V = svd(X, full=false);
|
|
n_seg = maximum(cv);
|
|
n_lambdas = length(lambdas);
|
|
|
|
# Finding residuals
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2))
|
|
yhat = broadcast(./, s .* (U'*y), denom)
|
|
yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld)
|
|
resid = broadcast(.-, y, U * yhat)
|
|
|
|
# Finding cross-validated residuals
|
|
rescv = zeros(n, n_lambdas);
|
|
sdenom = sqrt.(broadcast(./, s, denom))';
|
|
|
|
for seg in 1:n_seg
|
|
|
|
Useg = U[vec(cv .== seg),:];
|
|
Id = 1.0 * I(size(Useg,1)) .- 1/n;
|
|
|
|
for k in 1:n_lambdas
|
|
Uk = Useg .* sdenom[k,:]';
|
|
rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k];
|
|
end
|
|
end
|
|
|
|
# Calculating rmsecv and regression coefficients
|
|
press = sum(rescv.^2, dims=1)';
|
|
rmsecv = sqrt.(1/n .* press);
|
|
bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2);
|
|
bcoeffs = regMat \ bcoeffs;
|
|
|
|
# Creating regression coefficients for uncentred data
|
|
if my != 0
|
|
bcoeffs = [my .- mX*bcoeffs; bcoeffs];
|
|
end
|
|
|
|
# Finding rmsecv-minimal lambda value and associated regression coefficients
|
|
lambda_min, lambda_min_ind = findmin(rmsecv)
|
|
lambda_min_ind = lambda_min_ind[1]
|
|
b_lambda_min = bcoeffs[:,lambda_min_ind]
|
|
|
|
return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs
|
|
end
|
|
|
|
"""
|
|
Updates regression coefficient by solving the augmented TR problem [Xs; sqrt(lambda)*I] * beta = [ys; sqrt(lambda)*b_old]
|
|
Note that many regularization types are supported but the regularization is on the difference between new and old reg. coeffs.
|
|
and so most regularization types are probably not meaningful.
|
|
"""
|
|
function TRLooCVUpdate(X, y, lambdas, bOld, regType="L2", derOrder=0)
|
|
|
|
n, p = size(X);
|
|
|
|
# Finding appropriate regularisation matrix
|
|
if regType == "bc"
|
|
regMat = [I(p); zeros(derOrder,p)];
|
|
for i = 1:derOrder regMat = diff(regMat, dims = 1); end
|
|
elseif regType == "legendre"
|
|
regMat = [I(p); zeros(derOrder,p)];
|
|
for i = 1:derOrder regMat = diff(regMat, dims = 1); end
|
|
P, _ = plegendre(derOrder-1, p);
|
|
epsilon = 1e-14;
|
|
regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P;
|
|
elseif regType == "L2"
|
|
regMat = I(p);
|
|
elseif regType == "std"
|
|
regMat = Diagonal(vec(std(X, dims=1)));
|
|
elseif regType == "GL" # GL fractional derivative regulariztion
|
|
C = ones(p)*1.0;
|
|
for k in 2:p
|
|
C[k] = (1-(derOrder+1)/(k-1)) * C[k-1];
|
|
end
|
|
regMat = zeros(p,p);
|
|
for i in 1:p
|
|
regMat[i:end, i] = C[1:end-i+1];
|
|
end
|
|
end
|
|
|
|
|
|
# Preliminary calculations
|
|
mX = mean(X, dims=1);
|
|
X = X .- mX;
|
|
my = mean(y);
|
|
y = y .- my;
|
|
X = X / regMat;
|
|
U, s, V = svd(X, full=false);
|
|
|
|
# Main calculations
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2))
|
|
yhat = broadcast(./, s .* (U'*y), denom);
|
|
yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld)
|
|
resid = broadcast(.-, y, U * yhat)
|
|
H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n);
|
|
rescv = broadcast(./, resid, broadcast(.-, 1, H));
|
|
press = vec(sum(rescv.^2, dims=1));
|
|
rmsecv = sqrt.(1/n .* press);
|
|
gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2));
|
|
|
|
# Finding lambda that minimises rmsecv and GCV
|
|
idminrmsecv = findmin(press)[2][1];
|
|
idmingcv = findmin(gcv)[2][1];
|
|
lambdarmsecv = lambdas[idminrmsecv];
|
|
lambdagcv = lambdas[idmingcv];
|
|
|
|
# Calculating regression coefficients
|
|
bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2);
|
|
bcoeffs = regMat \ bcoeffs;
|
|
|
|
if my != 0
|
|
bcoeffs = [my .- mX*bcoeffs; bcoeffs];
|
|
end
|
|
|
|
brmsecv = bcoeffs[:, idminrmsecv];
|
|
bgcv = bcoeffs[:, idmingcv];
|
|
|
|
|
|
return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs
|
|
end
|
|
|
|
|
|
"""
|
|
### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION <-- Check that it is correctly added:) ###
|
|
|
|
regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14)
|
|
regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14)
|
|
|
|
Calculates and returns square regularization matrix.
|
|
|
|
Inputs:
|
|
- X/p : Size of regularization matrix or data matrix (size of reg. mat. will then be size(X,2)
|
|
- regType : "L2" (returns identity matrix)
|
|
"bc" (boundary condition, forces zero on right endpoint for derivative regularization) or
|
|
"legendre" (no boundary condition, but fills out reg. mat. with lower order polynomial trends to get square matrix)
|
|
"std" (standardization, FILL OUT WHEN DONE)
|
|
- regParam1 : Int64, Indicates degree of derivative regularization (0 gives L\\_2)
|
|
- regParam2 : For regType=="plegendre" added polynomials are multiplied by sqrt(regParam2)
|
|
|
|
Output: Square regularization matrix
|
|
"""
|
|
function regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14)
|
|
|
|
if regType == "std"
|
|
regParam2 = Diagonal(vec(std(X, dims=1)));
|
|
end
|
|
|
|
regMat = regularizationMatrix(size(X,2); regType, regParam1, regParam2)
|
|
|
|
return regMat
|
|
end
|
|
|
|
function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14)
|
|
|
|
if regType == "bc" # Discrete derivative with boundary conditions
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
elseif regType == "legendre" # Fill in polynomials in bottom row(s) to get square matrix
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
P, _ = plegendre(regParam1-1, p);
|
|
regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P;
|
|
elseif regType == "L2"
|
|
regMat = I(p);
|
|
elseif regType == "std" # Standardization
|
|
regMat = regParam2;
|
|
elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion
|
|
# regParam1 is alpha (order of fractional derivative)
|
|
C = ones(p)*1.0;
|
|
for k in 2:p
|
|
C[k] = (1-(regParam1+1)/(k-1)) * C[k-1];
|
|
end
|
|
|
|
regMat = zeros(p,p);
|
|
|
|
for i in 1:p
|
|
regMat[i:end, i] = C[1:end-i+1];
|
|
end
|
|
end
|
|
|
|
return regMat
|
|
end
|
|
|
|
"""
|
|
function TRLooCV
|
|
|
|
bpress, bgcv, rmsecv, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV = TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1)
|
|
|
|
regType: 'bc', 'legendre', 'L2', 'std', 'GL'
|
|
"""
|
|
function TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1)
|
|
|
|
n, p = size(X);
|
|
mX = mean(X, dims=1);
|
|
X = X .- mX;
|
|
my = mean(y);
|
|
y = y .- my;
|
|
|
|
if regType == "bc"
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
elseif regType == "legendre"
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
P, _ = plegendre(regParam-1, p);
|
|
regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P;
|
|
elseif regType == "L2"
|
|
regMat = I(p);
|
|
elseif regType == "std"
|
|
regMat = Diagonal(vec(std(X, dims=1)));
|
|
elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion
|
|
# regParam1 is alpha (order of fractional derivative)
|
|
C = ones(p)*1.0;
|
|
for k in 2:p
|
|
C[k] = (1-(regParam1+1)/(k-1)) * C[k-1];
|
|
end
|
|
|
|
regMat = zeros(p,p);
|
|
|
|
for i in 1:p
|
|
regMat[i:end, i] = C[1:end-i+1];
|
|
end
|
|
end
|
|
|
|
|
|
X = X / regMat;
|
|
U, s, V = svd(X, full=false)
|
|
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n);
|
|
resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom));
|
|
rescv = broadcast(./, resid, broadcast(.-, 1, H));
|
|
press = vec(sum(rescv.^2, dims=1));
|
|
rmsecv = sqrt.(1/n .* press);
|
|
GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2));
|
|
|
|
idminPRESS = findmin(press)[2][1]; # First index selects coordinates, second selects '1st coordinate'
|
|
idminGCV = findmin(GCV)[2][1]; # First index selects coordinates, second selects '1st coordinate'
|
|
lambdaPRESS = lambdas[idminPRESS];
|
|
lambdaGCV = lambdas[idminGCV];
|
|
|
|
bcoeffs = V * broadcast(./, (U' * y), denom);
|
|
bcoeffs = regMat \ bcoeffs;
|
|
|
|
if my != 0
|
|
bcoeffs = [my .- mX*bcoeffs; bcoeffs];
|
|
end
|
|
|
|
bpress = bcoeffs[:, idminPRESS]
|
|
bgcv = bcoeffs[:, idminGCV]
|
|
|
|
return bpress, bgcv, rmsecv, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV, bcoeffs
|
|
end
|
|
|
|
"""
|
|
function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14)
|
|
|
|
Segmented cross-validation based on the Sherman-Morrison-Woodbury updating formula.
|
|
Inputs:
|
|
- X : Data matrix
|
|
- y : Response vector
|
|
- lambdas : Vector of regularization parameter values
|
|
- cv : Vector of length n indicating segment membership for each sample
|
|
- regType, regParam1, regParam2 : Inputs to regularizationMatrix function
|
|
|
|
Outputs: b_lambda_min, rmsecv, lambda_min, lambda_min_ind
|
|
"""
|
|
function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14)
|
|
|
|
n, p = size(X);
|
|
mX = mean(X, dims=1);
|
|
X = X .- mX;
|
|
my = mean(y);
|
|
y = y .- my;
|
|
|
|
if regType == "bc"
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
elseif regType == "legendre"
|
|
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
|
|
P, _ = plegendre(regParam-1, p);
|
|
regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P;
|
|
elseif regType == "L2"
|
|
regMat = I(p);
|
|
elseif regType == "std"
|
|
regMat = Diagonal(vec(std(X, dims=1)));
|
|
elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion
|
|
# regParam1 is alpha (order of fractional derivative)
|
|
C = ones(p)*1.0;
|
|
for k in 2:p
|
|
C[k] = (1-(regParam1+1)/(k-1)) * C[k-1];
|
|
end
|
|
|
|
regMat = zeros(p,p);
|
|
|
|
for i in 1:p
|
|
regMat[i:end, i] = C[1:end-i+1];
|
|
end
|
|
end
|
|
|
|
|
|
X = X / regMat;
|
|
U, s, V = svd(X, full=false);
|
|
|
|
n_seg = maximum(cv);
|
|
n_lambdas = length(lambdas);
|
|
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom));
|
|
rescv = zeros(n, n_lambdas);
|
|
sdenom = sqrt.(broadcast(./, s, denom))';
|
|
|
|
for seg in 1:n_seg
|
|
|
|
Useg = U[vec(cv .== seg),:];
|
|
Id = 1.0 * I(size(Useg,1)) .- 1/n;
|
|
|
|
for k in 1:n_lambdas
|
|
Uk = Useg .* sdenom[k,:]';
|
|
rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k];
|
|
end
|
|
end
|
|
|
|
press = sum(rescv.^2, dims=1)';
|
|
rmsecv = sqrt.(1/n .* press);
|
|
bcoeffs = V * broadcast(./, (U' * y), denom);
|
|
bcoeffs = regMat \ bcoeffs;
|
|
|
|
if my != 0
|
|
bcoeffs = [my .- mX*bcoeffs; bcoeffs];
|
|
end
|
|
|
|
lambda_min, lambda_min_ind = findmin(rmsecv)
|
|
lambda_min_ind = lambda_min_ind[1]
|
|
b_lambda_min = bcoeffs[:,lambda_min_ind]
|
|
|
|
return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs
|
|
end
|
|
|
|
"""
|
|
function plegendre(d, p)
|
|
|
|
Calculates orthonormal Legendre polynomials using a QR factorisation.
|
|
Inputs:
|
|
- d : polynomial degree
|
|
- p : size of vector
|
|
|
|
Outputs:
|
|
- Q : (d+1) x p matrix with basis
|
|
- R : matrix from QR-factorisation
|
|
"""
|
|
function plegendre(d, p)
|
|
|
|
P = ones(p, d+1);
|
|
x = (-1:2/(p-1):1)';
|
|
for k in 1:d
|
|
P[:,k+1] = x.^k;
|
|
end
|
|
|
|
factorisation = qr(P);
|
|
Q = Matrix(factorisation.Q)';
|
|
R = Matrix(factorisation.R);
|
|
|
|
return Q, R
|
|
end
|
|
|
|
"""
|
|
"Manual" k-fold cv for solving the Ridge regression problem.
|
|
The LS problem is solved explicitly and no shortcuts are used.
|
|
"""
|
|
function TRSegCVNaive(X, y, lambdas, cvfolds)
|
|
|
|
n, p = size(X);
|
|
rmsecv = zeros(length(lambdas));
|
|
nfolds = length(unique(cvfolds));
|
|
|
|
for j = 1:length(lambdas)
|
|
for i = 1:nfolds
|
|
inds = (cvfolds .== i);
|
|
Xdata = X[vec(.!inds),:];
|
|
ydata = y[vec(.!inds)];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
|
|
betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; zeros(p,1)];
|
|
rmsecv[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2);
|
|
end
|
|
end
|
|
|
|
rmsecv = sqrt.(1/n .* rmsecv);
|
|
|
|
return rmsecv
|
|
end
|
|
|
|
"""
|
|
"Manual" k-fold cv for solving the Ridge regression problem update.
|
|
The LS problem is solved explicitly and no shortcuts are used.
|
|
"""
|
|
function TRSegCVUpdateNaive(X, y, lambdas, cvfolds, bOld)
|
|
|
|
n, p = size(X);
|
|
rmsecvman = zeros(length(lambdas));
|
|
nfolds = length(unique(cvfolds));
|
|
|
|
for j = 1:length(lambdas)
|
|
for i = 1:nfolds
|
|
inds = (cvfolds .== i);
|
|
Xdata = X[vec(.!inds),:];
|
|
ydata = y[vec(.!inds)];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
|
|
betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; sqrt(lambdas[j]) * bOld];
|
|
rmsecvman[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2);
|
|
end
|
|
end
|
|
|
|
rmsecvman = sqrt.(1/n .* rmsecvman);
|
|
|
|
return rmsecvman
|
|
end
|
|
|
|
|
|
"""
|
|
K-fold CV for the Ridge regression update problem, using the 'SVD-trick' for calculating the regression coefficients.
|
|
"""
|
|
function TRSegCVUpdateFair(X, y, lambdas, cv, bOld)
|
|
|
|
n, p = size(X);
|
|
rmsecvman = zeros(length(lambdas));
|
|
nfolds = length(unique(cv));
|
|
|
|
for i = 1:nfolds
|
|
inds = (cv .== i);
|
|
Xdata = X[vec(.!inds),:];
|
|
ydata = y[vec(.!inds)];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
|
|
U, s, V = svd(Xs, full=false);
|
|
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2));
|
|
|
|
# Calculating regression coefficients
|
|
bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2);
|
|
rmsecvman += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)';
|
|
|
|
end
|
|
|
|
rmsecvman = sqrt.(1/n .* rmsecvman);
|
|
|
|
return rmsecvman
|
|
end
|
|
|
|
"""
|
|
K-fold CV for the Ridge regression problem, using the 'SVD-trick' for calculating the regression coefficients.
|
|
"""
|
|
function TRSegCVFair(X, y, lambdas, cv)
|
|
|
|
n, p = size(X);
|
|
rmsecv = zeros(length(lambdas));
|
|
nfolds = length(unique(cv));
|
|
|
|
for i = 1:nfolds
|
|
inds = (cv .== i);
|
|
Xdata = X[vec(.!inds),:];
|
|
ydata = y[vec(.!inds)];
|
|
|
|
mX = mean(Xdata, dims=1);
|
|
my = mean(ydata);
|
|
Xs = Xdata .- mX;
|
|
ys = ydata .- my;
|
|
|
|
U, s, V = svd(Xs, full=false);
|
|
|
|
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
|
|
denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2));
|
|
|
|
# Calculating regression coefficients
|
|
bcoeffs = V * broadcast(./, (U' * ys), denom);
|
|
rmsecv += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)';
|
|
|
|
end
|
|
|
|
rmsecv = sqrt.(1/n .* rmsecv);
|
|
|
|
return rmsecv
|
|
end |