Ting.jl/src/variousRegressionFunctions.jl
2024-05-24 19:04:37 +02:00

165 lines
No EOL
3.3 KiB
Julia

"""
function rmse, ypred = calculateRMSE(X, y, beta)
Regner ut RMSE for lineær regresjonsmodell med eller uten konstantledd.
(Konstantledd må være første element i beta hvis med)
"""
function calculateRMSE(X, y, beta)
if length(beta) == size(X,2)
ypred = X * beta;
elseif length(beta) == (size(X,2) + 1)
ypred = beta[1] .+ X * beta[2:end]
end
rmsep = sqrt(mean(y - ypred).^2)
return rmse, ypred
end
"""
function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS)
Calculates "old" regression coefficients using CV and the 1 S.E. rule.
regFunction = PLS or PCR
"""
function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS)
cverror = zeros(ncomps,1);
# Iterating through each cv segment
for i=1:length(unique(cvfolds))
Xtrain = X[vec(i .!= cvfolds), :];
ytrain = y[vec(i .!= cvfolds), :];
Xtest = X[vec(i .== cvfolds), :];
ytest = y[vec(i .== cvfolds), :];
betas, _ = regFunction(Xtrain, ytrain, ncomps)
cverror += sum((ytest .- Xtest*betas[2:end,:] .- betas[1,:]').^2, dims=1)';
end
# Select components using 1 S.E. rule
cverror = cverror ./ size(X,1);
regCoeffsInds = findfirst(cverror .< (minimum(cverror) + std(cverror)/sqrt(length(unique(cvfolds)))))[1]
# Find model for whole dataset with selected number of components
betas, _ = regFunction(X, y, ncomps)
bold = betas[:, regCoeffsInds]
return bold, regCoeffsInds, cverror
end
"""
function PCR(X, y, kmax)
Principal Component Regression (PCR).
B, \\_ = PCR(X, y, 10);
"""
function PCR(X, y, kmax)
stdX = std(X, dims=1);
mX = mean(X, dims=1);
my = mean(y, dims=1);
y = y .- my;
if centre
X = X .- mX;
end
#if standardize
# X = X ./ stdX;
#end
U, s, V = svd(X, full=false);
q = s[1:kmax].^(-1) .*(U[:,1:kmax]'y);
B = cumsum(V[:,1:kmax] .* q', dims=2);
if centre
b0 = my .- mX * B
B = [b0; B];
end
return B, U, s, V
end
"""
PLS(X, y, A)
Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017)
### Arguments
- 'X' - Data matrix of predictors
- 'y' - Vector of responses
- 'A' - Number of components
### Returns:
- beta - Matrix with regression coefficients (including constant term if centre==true)
- W, B, T - Matrices such that X \approx T*B*W'.
"""
function PLS(X, y, A, centre=true)
n, p = size(X);
w = zeros(p, 1);
W = zeros(p, A);
t = zeros(n, 1);
T = zeros(n, A);
P = zeros(p, A);
beta = zeros(p, A);
B = zeros(A,2);
mX = mean(X, dims=1);
my = mean(y, dims=1);
y = y .- my;
if centre
X = X .- mX;
end
w = X'y;
w = w / norm(w);
W[:,1] = w;
t = X*w;
rho = norm(t);
t = t / rho;
T[:,1] = t;
B[1,1] = rho;
d = w / rho;
beta[:,1] = (t'y) .* d;
for a in 2:A
w = X'*t - rho * w;
w = w - W*(W'*w);
theta = norm(w);
w = w / theta;
W[:,a] = w;
t = X * w - theta * t;
t = t - T*(T'*t);
rho = norm(t);
t = t / rho;
T[:,a] = t;
B[a-1, 2] = theta;
B[a, 1] = rho;
d = (w - theta*d) / rho;
beta[:,a] = beta[:,a-1] + (t'y) .* d;
end
if centre
b0 = my .- mX * beta
beta = [b0; beta];
end
B = Bidiagonal(B[:,1], B[1:end-1,2], :U);
returnvals = beta, W, B, T
return returnvals
end