165 lines
No EOL
3.3 KiB
Julia
165 lines
No EOL
3.3 KiB
Julia
"""
|
|
function rmse, ypred = calculateRMSE(X, y, beta)
|
|
|
|
Regner ut RMSE for lineær regresjonsmodell med eller uten konstantledd.
|
|
(Konstantledd må være første element i beta hvis med)
|
|
"""
|
|
|
|
function calculateRMSE(X, y, beta)
|
|
|
|
if length(beta) == size(X,2)
|
|
ypred = X * beta;
|
|
elseif length(beta) == (size(X,2) + 1)
|
|
ypred = beta[1] .+ X * beta[2:end]
|
|
end
|
|
|
|
rmsep = sqrt(mean(y - ypred).^2)
|
|
|
|
return rmse, ypred
|
|
end
|
|
|
|
|
|
|
|
|
|
"""
|
|
function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS)
|
|
|
|
Calculates "old" regression coefficients using CV and the 1 S.E. rule.
|
|
regFunction = PLS or PCR
|
|
"""
|
|
function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS)
|
|
cverror = zeros(ncomps,1);
|
|
|
|
# Iterating through each cv segment
|
|
for i=1:length(unique(cvfolds))
|
|
Xtrain = X[vec(i .!= cvfolds), :];
|
|
ytrain = y[vec(i .!= cvfolds), :];
|
|
Xtest = X[vec(i .== cvfolds), :];
|
|
ytest = y[vec(i .== cvfolds), :];
|
|
|
|
betas, _ = regFunction(Xtrain, ytrain, ncomps)
|
|
cverror += sum((ytest .- Xtest*betas[2:end,:] .- betas[1,:]').^2, dims=1)';
|
|
end
|
|
|
|
# Select components using 1 S.E. rule
|
|
cverror = cverror ./ size(X,1);
|
|
regCoeffsInds = findfirst(cverror .< (minimum(cverror) + std(cverror)/sqrt(length(unique(cvfolds)))))[1]
|
|
|
|
# Find model for whole dataset with selected number of components
|
|
betas, _ = regFunction(X, y, ncomps)
|
|
bold = betas[:, regCoeffsInds]
|
|
|
|
return bold, regCoeffsInds, cverror
|
|
end
|
|
|
|
"""
|
|
function PCR(X, y, kmax)
|
|
|
|
Principal Component Regression (PCR).
|
|
B, \\_ = PCR(X, y, 10);
|
|
"""
|
|
function PCR(X, y, kmax)
|
|
|
|
stdX = std(X, dims=1);
|
|
mX = mean(X, dims=1);
|
|
my = mean(y, dims=1);
|
|
y = y .- my;
|
|
|
|
if centre
|
|
X = X .- mX;
|
|
end
|
|
|
|
#if standardize
|
|
# X = X ./ stdX;
|
|
#end
|
|
|
|
U, s, V = svd(X, full=false);
|
|
|
|
q = s[1:kmax].^(-1) .*(U[:,1:kmax]'y);
|
|
B = cumsum(V[:,1:kmax] .* q', dims=2);
|
|
|
|
if centre
|
|
b0 = my .- mX * B
|
|
B = [b0; B];
|
|
end
|
|
|
|
return B, U, s, V
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
PLS(X, y, A)
|
|
|
|
Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017)
|
|
|
|
### Arguments
|
|
- 'X' - Data matrix of predictors
|
|
- 'y' - Vector of responses
|
|
- 'A' - Number of components
|
|
|
|
### Returns:
|
|
- beta - Matrix with regression coefficients (including constant term if centre==true)
|
|
- W, B, T - Matrices such that X \approx T*B*W'.
|
|
"""
|
|
function PLS(X, y, A, centre=true)
|
|
|
|
n, p = size(X);
|
|
|
|
w = zeros(p, 1);
|
|
W = zeros(p, A);
|
|
t = zeros(n, 1);
|
|
T = zeros(n, A);
|
|
P = zeros(p, A);
|
|
beta = zeros(p, A);
|
|
B = zeros(A,2);
|
|
|
|
mX = mean(X, dims=1);
|
|
my = mean(y, dims=1);
|
|
y = y .- my;
|
|
|
|
if centre
|
|
X = X .- mX;
|
|
end
|
|
|
|
w = X'y;
|
|
w = w / norm(w);
|
|
W[:,1] = w;
|
|
t = X*w;
|
|
rho = norm(t);
|
|
t = t / rho;
|
|
T[:,1] = t;
|
|
|
|
B[1,1] = rho;
|
|
d = w / rho;
|
|
beta[:,1] = (t'y) .* d;
|
|
|
|
for a in 2:A
|
|
w = X'*t - rho * w;
|
|
w = w - W*(W'*w);
|
|
theta = norm(w);
|
|
w = w / theta;
|
|
W[:,a] = w;
|
|
t = X * w - theta * t;
|
|
t = t - T*(T'*t);
|
|
rho = norm(t);
|
|
t = t / rho;
|
|
T[:,a] = t;
|
|
B[a-1, 2] = theta;
|
|
B[a, 1] = rho;
|
|
d = (w - theta*d) / rho;
|
|
beta[:,a] = beta[:,a-1] + (t'y) .* d;
|
|
end
|
|
|
|
if centre
|
|
b0 = my .- mX * beta
|
|
beta = [b0; beta];
|
|
end
|
|
|
|
B = Bidiagonal(B[:,1], B[1:end-1,2], :U);
|
|
|
|
returnvals = beta, W, B, T
|
|
return returnvals
|
|
end |