From 06d990278de25dbe5b8d499dba6c5f5d3c1cdbd7 Mon Sep 17 00:00:00 2001 From: Joakim Skogholt Date: Wed, 15 May 2024 09:48:57 +0200 Subject: [PATCH] Added pcr and pls (bidiag2) --- src/Ting.jl | 5 ++ src/variousRegressionFunctions.jl | 123 ++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 src/variousRegressionFunctions.jl diff --git a/src/Ting.jl b/src/Ting.jl index 59d88d5..6d05dbe 100644 --- a/src/Ting.jl +++ b/src/Ting.jl @@ -30,7 +30,12 @@ export TRSegCVUpdateFair export TRSegCVNaive export TRSegCVFair +# From "variousRegressionFunctions.jl" +export PCR +export bidiag2 + include("convenience.jl") include("TR.jl") +include("variousRegressionFunctions") end \ No newline at end of file diff --git a/src/variousRegressionFunctions.jl b/src/variousRegressionFunctions.jl new file mode 100644 index 0000000..29032a8 --- /dev/null +++ b/src/variousRegressionFunctions.jl @@ -0,0 +1,123 @@ + + + + + + +""" + function PCR(X, y, kmax, centre=true)#, standardize=true) + +Principal Component Regression (PCR). +Inputs: Data matrix, response vector, maximum number of components. +A constant term is included in the modeling if centre==true. +Outputs: B (matrix of size (p+1) x kmax), U, s, V +ADD STANDARDIZATION?? (NEED TO THINK THROUGH PREDICTION WITH NEW DATA) + +X, y = importData("Beer"); +B, \\_ = PCR(X, y, 10, true, false); +""" +function PCR(X, y, kmax, centre::Bool=true)#, standardize=true) + +stdX = std(X, dims=1); +mX = mean(X, dims=1); +my = mean(y, dims=1); +y = y .- my; + +if centre + X = X .- mX; +end + +#if standardize +# X = X ./ stdX; +#end + +U, s, V = svd(X, full=false); + +q = s[1:kmax].^(-1) .*(U[:,1:kmax]'y); +B = cumsum(V[:,1:kmax] .* q', dims=2); + +if centre + b0 = my .- mX * B + B = [b0; B]; +end + +return B, U, s, V +end + + + + + +""" + bidiag2(X, y, A) + +Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017) + +### Arguments + - 'X' - Data matrix of predictors + - 'y' - Vector of responses + - 'A' - Number of components + +### Returns: + - beta - Matrix with regression coefficients (including constant term if centre==true) + - W, B, T - Matrices such that X \approx T*B*W'. +""" +function bidiag2(X, y, A, centre=true) + +n, p = size(X); + +w = zeros(p, 1); +W = zeros(p, A); +t = zeros(n, 1); +T = zeros(n, A); +P = zeros(p, A); +beta = zeros(p, A); +B = zeros(A,2); + +mX = mean(X, dims=1); +my = mean(y, dims=1); +y = y .- my; + +if centre + X = X .- mX; +end + +w = X'y; +w = w / norm(w); +W[:,1] = w; +t = X*w; +rho = norm(t); +t = t / rho; +T[:,1] = t; + +B[1,1] = rho; +d = w / rho; +beta[:,1] = (t'y) .* d; + +for a in 2:A + w = X'*t - rho * w; + w = w - W*(W'*w); + theta = norm(w); + w = w / theta; + W[:,a] = w; + t = X * w - theta * t; + t = t - T*(T'*t); + rho = norm(t); + t = t / rho; + T[:,a] = t; + B[a-1, 2] = theta; + B[a, 1] = rho; + d = (w - theta*d) / rho; + beta[:,a] = beta[:,a-1] + (t'y) .* d; +end + +if centre + b0 = my .- mX * beta + beta = [b0; beta]; +end + +B = Bidiagonal(B[:,1], B[1:end-1,2], :U); + +returnvals = beta, W, B, T +return returnvals +end \ No newline at end of file