Added lots of stuff
This commit is contained in:
parent
c9644c0f6d
commit
4ad7d0c8d6
4 changed files with 178 additions and 3 deletions
|
|
@ -28,6 +28,7 @@ export createDataSplitBinaryStratified
|
||||||
export importData
|
export importData
|
||||||
|
|
||||||
export PCR
|
export PCR
|
||||||
|
export bidiag2
|
||||||
|
|
||||||
|
|
||||||
include("preprocessing.jl")
|
include("preprocessing.jl")
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,103 @@ using Random
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function predRegression(X, beta, y)
|
||||||
|
function predRegression(X::Vector{Float64}, beta, y)
|
||||||
|
|
||||||
|
Prediction for linear model of form y = X * beta [+ b0]
|
||||||
|
Returns ypred, rmsep
|
||||||
|
"""
|
||||||
|
function predRegression(X, beta, y)
|
||||||
|
|
||||||
|
if length(beta) == size(X,2)
|
||||||
|
ypred = X * beta;
|
||||||
|
elseif length(beta) == size(X,2) + 1
|
||||||
|
ypred = beta[1] .+ X * beta[2:end]
|
||||||
|
end
|
||||||
|
|
||||||
|
rmsep = sqrt(mean(y - ypred).^2)
|
||||||
|
|
||||||
|
return ypred, rmsep
|
||||||
|
end
|
||||||
|
|
||||||
|
function predRegression(X::Vector{Float64}, beta, y)
|
||||||
|
|
||||||
|
if length(beta) == length(X)
|
||||||
|
ypred = X' * beta;
|
||||||
|
elseif length(beta) == length(X) + 1
|
||||||
|
ypred = beta[1] .+ X' * beta[2:end];
|
||||||
|
end
|
||||||
|
|
||||||
|
rmsep = sqrt(mean(y - ypred).^2)
|
||||||
|
|
||||||
|
return ypred, rmsep
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function calculateRMSE(X, y, regfunction, funcargs, nsplits=1, props=[6/10, 2/10, 2/10], rngseed=42)
|
||||||
|
function calculateRMSE(X, y, splits, nTrainValTest, regfunction, funcargs)
|
||||||
|
|
||||||
|
Calculates rmse for train/val/test sets for regfunction with args funcargs.
|
||||||
|
Assume regfunction takes arguments of form "regfunction(X, y, funcargs...)" and first return argument
|
||||||
|
is regression coefficients. Second function uses the splits given as input.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function calculateRMSE(X, y, regfunction, funcargs, nsplits=1, props=[6/10, 2/10, 2/10], rngseed=42, emscpreproc=false, emscdegree=6)
|
||||||
|
|
||||||
|
splits, nTrainValTest = createDataSplitInds(X, nsplits, props, rngseed);
|
||||||
|
meanrmse, rmse = calculateRMSE(X, y, splits, nTrainValTest, regfunction, funcargs, emscpreproc, emscdegree);
|
||||||
|
|
||||||
|
return meanrmse, rmse
|
||||||
|
end
|
||||||
|
|
||||||
|
function calculateRMSE(X, y, splits, nTrainValTest, regfunction, funcargs, emscpreproc=false, emscdegree=6)
|
||||||
|
|
||||||
|
nsplits = size(splits,2);
|
||||||
|
B, _ = regfunction(X, y, funcargs...); # <- Slow, but works in general. Maybe add some special cases for known functions?
|
||||||
|
kmax = size(B, 2);
|
||||||
|
rmse = zeros(3, kmax, nsplits);
|
||||||
|
|
||||||
|
for i in 1:nsplits
|
||||||
|
split = createDataSplit(splits, nTrainValTest, i, X, y);
|
||||||
|
XTrain = split["XTrain"];
|
||||||
|
XVal = split["XVal"];
|
||||||
|
XTest = split["XTest"];
|
||||||
|
yTrain = split["YTrain"];
|
||||||
|
yVal = split["YVal"];
|
||||||
|
yTest = split["YTest"];
|
||||||
|
|
||||||
|
if emscpreproc
|
||||||
|
XTrain, output = EMSC(XTrain, emscdegree, "svd", 1, -1, 0); # nRef, baseDeg, intF
|
||||||
|
XVal, _ = EMSC(XVal, output["model"]);
|
||||||
|
XTest, _ = EMSC(XTest, output["model"]);
|
||||||
|
end
|
||||||
|
|
||||||
|
B, _ = regfunction(XTrain, yTrain, funcargs...);
|
||||||
|
|
||||||
|
for j=1:kmax
|
||||||
|
_, rmse[1, j, i] = predRegression(XTrain, B[:,j], yTrain);
|
||||||
|
_, rmse[2, j, i] = predRegression(XVal, B[:,j], yVal);
|
||||||
|
_, rmse[3, j, i] = predRegression(XTest, B[:,j], yTest);
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
meanrmse = dropdims(mean(rmse, dims=3), dims=3);
|
||||||
|
|
||||||
|
return meanrmse, rmse
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
createDataSplitInds(X, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
createDataSplitInds(X, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
||||||
createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,7 @@ refSpec = mean(X, dims=1)';
|
||||||
basis = [refSpec P];
|
basis = [refSpec P];
|
||||||
|
|
||||||
return_values = EMSCTraditional(X, basis);
|
return_values = EMSCTraditional(X, basis);
|
||||||
|
|
||||||
return return_values
|
return return_values
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -151,8 +152,7 @@ function EMSCTraditional(X, basis::Matrix{Float64})
|
||||||
coeffs = basis \ X';
|
coeffs = basis \ X';
|
||||||
X_Cor = (X - coeffs[2:end,:]' * basis[:,2:end]') ./ coeffs[1,:];
|
X_Cor = (X - coeffs[2:end,:]' * basis[:,2:end]') ./ coeffs[1,:];
|
||||||
|
|
||||||
return_values = Dict([("X_Cor", X_Cor), ("basis", basis), ("coeffs", coeffs)]);
|
return X_Cor, Dict([("basis", basis), ("coeffs", coeffs)]);
|
||||||
return return_values
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,3 +32,80 @@ B = [b0; B];
|
||||||
|
|
||||||
return B, U, s, V
|
return B, U, s, V
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
bidiag2(X, y, A)
|
||||||
|
|
||||||
|
Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017)
|
||||||
|
|
||||||
|
### Arguments
|
||||||
|
- 'X' - Data matrix of predictors
|
||||||
|
- 'y' - Vector of responses
|
||||||
|
- 'A' - Number of components
|
||||||
|
|
||||||
|
### Returns:
|
||||||
|
- beta - Matrix with regression coefficients (including constant term if centre==true)
|
||||||
|
- W, T, B - Matrices such that X \approx T*B*W'.
|
||||||
|
B in compact form, use following to create full: B2 = zeros(A,A); B2[diagind(B2)] = B[:,1]; B2[diagind(B2,1)] = B[2:end,2];
|
||||||
|
"""
|
||||||
|
function bidiag2(X, y, A, centre=true)
|
||||||
|
|
||||||
|
n, p = size(X);
|
||||||
|
|
||||||
|
w = zeros(p, 1);
|
||||||
|
W = zeros(p, A);
|
||||||
|
t = zeros(n, 1);
|
||||||
|
T = zeros(n, A);
|
||||||
|
P = zeros(p, A);
|
||||||
|
beta = zeros(p, A);
|
||||||
|
B = zeros(A,2);
|
||||||
|
|
||||||
|
mX = mean(X, dims=1);
|
||||||
|
my = mean(y, dims=1);
|
||||||
|
y = y .- my;
|
||||||
|
|
||||||
|
if centre
|
||||||
|
X = X .- mX;
|
||||||
|
end
|
||||||
|
|
||||||
|
w = X'y;
|
||||||
|
w = w / norm(w);
|
||||||
|
W[:,1] = w;
|
||||||
|
t = X*w;
|
||||||
|
rho = norm(t);
|
||||||
|
t = t / rho;
|
||||||
|
T[:,1] = t;
|
||||||
|
|
||||||
|
B[1,1] = rho;
|
||||||
|
d = w / rho;
|
||||||
|
beta[:,1] = (t'y) .* d;
|
||||||
|
|
||||||
|
for a in 2:A
|
||||||
|
w = X'*t - rho * w;
|
||||||
|
w = w - W*(W'*w);
|
||||||
|
theta = norm(w);
|
||||||
|
w = w / theta;
|
||||||
|
W[:,a] = w;
|
||||||
|
t = X * w - theta * t;
|
||||||
|
t = t - T*(T'*t);
|
||||||
|
rho = norm(t);
|
||||||
|
t = t / rho;
|
||||||
|
T[:,a] = t;
|
||||||
|
B[a-1, 2] = theta;
|
||||||
|
B[a, 1] = rho;
|
||||||
|
d = (w - theta*d) / rho;
|
||||||
|
beta[:,a] = beta[:,a-1] + (t'y) .* d;
|
||||||
|
end
|
||||||
|
|
||||||
|
if centre
|
||||||
|
b0 = my .- mX * beta
|
||||||
|
beta = [b0; beta];
|
||||||
|
end
|
||||||
|
|
||||||
|
returnvals = beta, W, T, B
|
||||||
|
return returnvals
|
||||||
|
end
|
||||||
Loading…
Add table
Reference in a new issue