diff --git a/Project.toml b/Project.toml index 6f6cff8..cfb61bb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MinPakke" uuid = "be803360-cecc-4859-8120-03d0223bb960" authors = ["Joakim"] -version = "1.0.0-DEV" +version = "1.0.1-DEV" [deps] LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -12,6 +12,7 @@ OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] julia = "1" diff --git a/src/MinPakke.jl b/src/MinPakke.jl index 5d89504..f8c4ab0 100644 --- a/src/MinPakke.jl +++ b/src/MinPakke.jl @@ -8,6 +8,7 @@ using Optimization using OptimizationOptimJL using OptimizationBBO using LaTeXStrings +using Random export plegendre export SNV @@ -26,8 +27,12 @@ export createDataSplit export createDataSplitBinaryStratified export importData +export PCR + + include("preprocessing.jl") include("convenience.jl") include("conveniencePlots.jl") +include("variousRegressionFunctions.jl") end \ No newline at end of file diff --git a/src/preprocessing.jl b/src/preprocessing.jl index a1a6b6a..521db69 100644 --- a/src/preprocessing.jl +++ b/src/preprocessing.jl @@ -37,7 +37,6 @@ Outputs: - Q : (d+1) x p matrix with basis - R : matrix from QR-factorisation """ - function plegendre(d, p) P = ones(p, d+1); diff --git a/src/variousRegressionFunctions.jl b/src/variousRegressionFunctions.jl new file mode 100644 index 0000000..2e4b17b --- /dev/null +++ b/src/variousRegressionFunctions.jl @@ -0,0 +1,34 @@ +""" + function PCR(X, y, kmax, centre=True, standardize=true) + +Principal Component Regression (PCR). +Inputs: Data matrix, response vector, maximum number of components. +A constant term is included in the modeling. +Outputs: B (matrix of size (p+1) x kmax), U, s, V + +X, y = importData("Beer"); +B, \\_ = PCR(X, y, 10, true, false); +""" +function PCR(X, y, kmax, centre=true, standardize=true) + +mX = mean(X, dims=1); +my = mean(y, dims=1); +stdX = std(X, dims=1); + +if centre + X = X .- mX; +end + +if standardize + X = X ./ stdX; +end + +U, s, V = svd(X, full=false); + +q = s[1:kmax].^(-1) .*(U[:,1:kmax]'y); +B = cumsum(V[:,1:kmax] .* q', dims=2); +b0 = my .- mX * B +B = [b0; B]; + +return B, U, s, V +end \ No newline at end of file