diff --git a/src/TR.jl b/src/TR.jl index e58c21a..fdbd07c 100644 --- a/src/TR.jl +++ b/src/TR.jl @@ -1,286 +1,28 @@ """ -Solves the model update problem explicitly as a least squares problem with stacked matrices. -In practice the most naive way of approaching the update problem -""" -function TRLooCVUpdateNaive(X, y, lambdasu, bOld) - -n, p = size(X); -rmsecv = zeros(length(lambdasu)); - -for i = 1:n - inds = setdiff(1:n, i); - Xdata = X[inds,:]; - ydata = y[inds]; - - mX = mean(Xdata, dims=1); - my = mean(ydata); - Xs = Xdata .- mX; - ys = ydata .- my; - p2 = size(Xdata, 2); - - for j = 1:length(lambdasu) - betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bOld]; - rmsecv[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2; - end -end - -rmsecv = sqrt.(1/n .* rmsecv); - -return rmsecv -end - -""" -Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections. -Hence regression coefficients are calculated for all lambda values -""" -function TRLooCVUpdateFair(X, y, lambdasu, bOld) - -n, p = size(X); -rmsecv = zeros(length(lambdasu)) - -for i = 1:n - inds = setdiff(1:n, i); - Xdata = X[inds,:]; - ydata = y[inds]; - - mX = mean(Xdata, dims=1); - my = mean(ydata); - Xs = Xdata .- mX; - ys = ydata .- my; - U, s, V = svd(Xs, full=false); - - denom = broadcast(.+, broadcast(./, lambdasu, s'), s')'; - denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2)) - - # Calculating regression coefficients and residual - bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); - rmsecv += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)'; -end - -rmsecv = sqrt.(1/n .* rmsecv); - -return rmsecv -end - -""" -Fast k-fold cv for updating regression coefficients -""" -function TRSegCVUpdate(X, y, lambdas, cv, bOld, regType="L2", derOrder=0) - -n, p = size(X); - -# Finding appropriate regularisation matrix -if regType == "bc" - regMat = [I(p); zeros(derOrder,p)]; - for i = 1:derOrder regMat = diff(regMat, dims = 1); end -elseif regType == "legendre" - regMat = [I(p); zeros(derOrder,p)]; - for i = 1:derOrder regMat = diff(regMat, dims = 1); end - P, _ = plegendre(derOrder-1, p); - epsilon = 1e-14; - regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; -elseif regType == "L2" - regMat = I(p); -elseif regType == "std" - regMat = Diagonal(vec(std(X, dims=1))); -elseif regType == "GL" # GL fractional derivative regulariztion - C = ones(p)*1.0; - for k in 2:p - C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; - end - - regMat = zeros(p,p); - - for i in 1:p - regMat[i:end, i] = C[1:end-i+1]; - end -end - -# Preliminary calculations -mX = mean(X, dims=1); -X = X .- mX; -my = mean(y); -y = y .- my; -X = X / regMat; -U, s, V = svd(X, full=false); -n_seg = maximum(cv); -n_lambdas = length(lambdas); - -# Finding residuals -denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; -denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) -yhat = broadcast(./, s .* (U'*y), denom) -yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) -resid = broadcast(.-, y, U * yhat) - -# Finding cross-validated residuals -rescv = zeros(n, n_lambdas); -sdenom = sqrt.(broadcast(./, s, denom))'; - -for seg in 1:n_seg - - Useg = U[vec(cv .== seg),:]; - Id = 1.0 * I(size(Useg,1)) .- 1/n; - - for k in 1:n_lambdas - Uk = Useg .* sdenom[k,:]'; - rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k]; - end -end - -# Calculating rmsecv and regression coefficients -press = sum(rescv.^2, dims=1)'; -rmsecv = sqrt.(1/n .* press); -bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); -bcoeffs = regMat \ bcoeffs; - -# Creating regression coefficients for uncentred data -if my != 0 - bcoeffs = [my .- mX*bcoeffs; bcoeffs]; -end - -# Finding rmsecv-minimal lambda value and associated regression coefficients -lambda_min, lambda_min_ind = findmin(rmsecv) -lambda_min_ind = lambda_min_ind[1] -b_lambda_min = bcoeffs[:,lambda_min_ind] - -return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs -end - -""" -Updates regression coefficient by solving the augmented TR problem [Xs; sqrt(lambda)*I] * beta = [ys; sqrt(lambda)*b_old] -Note that many regularization types are supported but the regularization is on the difference between new and old reg. coeffs. -and so most regularization types are probably not meaningful. -""" -function TRLooCVUpdate(X, y, lambdas, bOld, regType="L2", derOrder=0) - -n, p = size(X); - -# Finding appropriate regularisation matrix -if regType == "bc" - regMat = [I(p); zeros(derOrder,p)]; - for i = 1:derOrder regMat = diff(regMat, dims = 1); end -elseif regType == "legendre" - regMat = [I(p); zeros(derOrder,p)]; - for i = 1:derOrder regMat = diff(regMat, dims = 1); end - P, _ = plegendre(derOrder-1, p); - epsilon = 1e-14; - regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; -elseif regType == "L2" - regMat = I(p); -elseif regType == "std" - regMat = Diagonal(vec(std(X, dims=1))); -elseif regType == "GL" # GL fractional derivative regulariztion - C = ones(p)*1.0; - for k in 2:p - C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; - end - regMat = zeros(p,p); - for i in 1:p - regMat[i:end, i] = C[1:end-i+1]; - end -end - - -# Preliminary calculations -mX = mean(X, dims=1); -X = X .- mX; -my = mean(y); -y = y .- my; -X = X / regMat; -U, s, V = svd(X, full=false); - -# Main calculations -denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; -denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) -yhat = broadcast(./, s .* (U'*y), denom); -yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) -resid = broadcast(.-, y, U * yhat) -H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); -rescv = broadcast(./, resid, broadcast(.-, 1, H)); -press = vec(sum(rescv.^2, dims=1)); -rmsecv = sqrt.(1/n .* press); -gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); - -# Finding lambda that minimises rmsecv and GCV -idminrmsecv = findmin(press)[2][1]; -idmingcv = findmin(gcv)[2][1]; -lambdarmsecv = lambdas[idminrmsecv]; -lambdagcv = lambdas[idmingcv]; - -# Calculating regression coefficients -bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); -bcoeffs = regMat \ bcoeffs; - -if my != 0 - bcoeffs = [my .- mX*bcoeffs; bcoeffs]; -end - -brmsecv = bcoeffs[:, idminrmsecv]; -bgcv = bcoeffs[:, idmingcv]; - - -return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs -end - - -""" -### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION <-- Check that it is correctly added:) ### - - regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) - regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) - -Calculates and returns square regularization matrix. + function plegendre(d, p) +Calculates orthonormal Legendre polynomials using a QR factorisation. Inputs: - - X/p : Size of regularization matrix or data matrix (size of reg. mat. will then be size(X,2) - - regType : "L2" (returns identity matrix) - "bc" (boundary condition, forces zero on right endpoint for derivative regularization) or - "legendre" (no boundary condition, but fills out reg. mat. with lower order polynomial trends to get square matrix) - "std" (standardization, FILL OUT WHEN DONE) - - regParam1 : Int64, Indicates degree of derivative regularization (0 gives L\\_2) - - regParam2 : For regType=="plegendre" added polynomials are multiplied by sqrt(regParam2) + - d : polynomial degree + - p : size of vector -Output: Square regularization matrix +Outputs: + - Q : (d+1) x p matrix with basis + - R : matrix from QR-factorisation """ -function regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14) - -if regType == "std" - regParam2 = Diagonal(vec(std(X, dims=1))); +function plegendre(d, p) + +P = ones(p, d+1); +x = (-1:2/(p-1):1)'; +for k in 1:d + P[:,k+1] = x.^k; end -regMat = regularizationMatrix(size(X,2); regType, regParam1, regParam2) +factorisation = qr(P); +Q = Matrix(factorisation.Q)'; +R = Matrix(factorisation.R); -return regMat -end - -function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) - -if regType == "bc" # Discrete derivative with boundary conditions - regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end -elseif regType == "legendre" # Fill in polynomials in bottom row(s) to get square matrix - regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end - P, _ = plegendre(regParam1-1, p); - regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; -elseif regType == "L2" - regMat = I(p); -elseif regType == "std" # Standardization - regMat = regParam2; -elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion - # regParam1 is alpha (order of fractional derivative) - C = ones(p)*1.0; - for k in 2:p - C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; - end - - regMat = zeros(p,p); - - for i in 1:p - regMat[i:end, i] = C[1:end-i+1]; - end -end - -return regMat +return Q, R end """ @@ -436,32 +178,6 @@ b_lambda_min = bcoeffs[:,lambda_min_ind] return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs end -""" - function plegendre(d, p) - -Calculates orthonormal Legendre polynomials using a QR factorisation. -Inputs: - - d : polynomial degree - - p : size of vector - -Outputs: - - Q : (d+1) x p matrix with basis - - R : matrix from QR-factorisation -""" -function plegendre(d, p) - -P = ones(p, d+1); -x = (-1:2/(p-1):1)'; -for k in 1:d - P[:,k+1] = x.^k; -end - -factorisation = qr(P); -Q = Matrix(factorisation.Q)'; -R = Matrix(factorisation.R); - -return Q, R -end """ "Manual" k-fold cv for solving the Ridge regression problem. @@ -494,73 +210,6 @@ rmsecv = sqrt.(1/n .* rmsecv); return rmsecv end -""" -"Manual" k-fold cv for solving the Ridge regression problem update. -The LS problem is solved explicitly and no shortcuts are used. -""" -function TRSegCVUpdateNaive(X, y, lambdas, cvfolds, bOld) - -n, p = size(X); -rmsecv = zeros(length(lambdas)); -nfolds = length(unique(cvfolds)); - -for j = 1:length(lambdas) - for i = 1:nfolds - inds = (cvfolds .== i); - Xdata = X[vec(.!inds),:]; - ydata = y[vec(.!inds)]; - - mX = mean(Xdata, dims=1); - my = mean(ydata); - Xs = Xdata .- mX; - ys = ydata .- my; - - betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; sqrt(lambdas[j]) * bOld]; - rmsecv[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2); - end -end - -rmsecv = sqrt.(1/n .* rmsecv); - -return rmsecv -end - - -""" -K-fold CV for the Ridge regression update problem, using the 'SVD-trick' for calculating the regression coefficients. -""" -function TRSegCVUpdateFair(X, y, lambdas, cv, bOld) - -n, p = size(X); -rmsecv = zeros(length(lambdas)); -nfolds = length(unique(cv)); - -for i = 1:nfolds - inds = (cv .== i); - Xdata = X[vec(.!inds),:]; - ydata = y[vec(.!inds)]; - - mX = mean(Xdata, dims=1); - my = mean(ydata); - Xs = Xdata .- mX; - ys = ydata .- my; - - U, s, V = svd(Xs, full=false); - - denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; - denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2)); - - # Calculating regression coefficients - bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); - rmsecv += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)'; - -end - -rmsecv = sqrt.(1/n .* rmsecv); - - return rmsecv -end - """ K-fold CV for the Ridge regression problem, using the 'SVD-trick' for calculating the regression coefficients. """ diff --git a/src/TRLooUpdate.jl b/src/TRLooUpdate.jl new file mode 100644 index 0000000..555d7eb --- /dev/null +++ b/src/TRLooUpdate.jl @@ -0,0 +1,213 @@ +""" +Solves the model update problem explicitly as a least squares problem with stacked matrices. +In practice the most naive way of approaching the update problem +""" +function TRLooCVUpdateNaive(X, y, lambdasu, bOld) + +n, p = size(X); +rmsecv = zeros(length(lambdasu)); + +for i = 1:n + inds = setdiff(1:n, i); + Xdata = X[inds,:]; + ydata = y[inds]; + + mX = mean(Xdata, dims=1); + my = mean(ydata); + Xs = Xdata .- mX; + ys = ydata .- my; + p2 = size(Xdata, 2); + + for j = 1:length(lambdasu) + betas = [Xs; sqrt(lambdasu[j]) * I(p2)] \ [ys ; sqrt(lambdasu[j]) * bOld]; + rmsecv[j] += (y[i] - (((X[i,:]' .- mX) * betas)[1] + my))^2; + end +end + +rmsecv = sqrt.(1/n .* rmsecv); + +return rmsecv +end + +""" +Uses the 'svd-trick' for efficient calculation of regression coefficients, but does not use leverage corrections. +Hence regression coefficients are calculated for all lambda values +""" +function TRLooCVUpdateFair(X, y, lambdasu, bOld) + +n, p = size(X); +rmsecv = zeros(length(lambdasu)) + +for i = 1:n + inds = setdiff(1:n, i); + Xdata = X[inds,:]; + ydata = y[inds]; + + mX = mean(Xdata, dims=1); + my = mean(ydata); + Xs = Xdata .- mX; + ys = ydata .- my; + U, s, V = svd(Xs, full=false); + + denom = broadcast(.+, broadcast(./, lambdasu, s'), s')'; + denom2 = broadcast(.+, ones(n-1), broadcast(./, lambdasu', s.^2)) + + # Calculating regression coefficients and residual + bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); + rmsecv += ((y[i] .- ((X[i,:]' .- mX) * bcoeffs .+ my)).^2)'; +end + +rmsecv = sqrt.(1/n .* rmsecv); + +return rmsecv +end + +""" +Updates regression coefficient by solving the augmented TR problem [Xs; sqrt(lambda)*I] * beta = [ys; sqrt(lambda)*b_old] +Note that many regularization types are supported but the regularization is on the difference between new and old reg. coeffs. +and so most regularization types are probably not meaningful. +""" +function TRLooCVUpdate(X, y, lambdas, bOld, regType="L2", derOrder=0) + +n, p = size(X); + +# Finding appropriate regularisation matrix +if regType == "bc" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end +elseif regType == "legendre" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end + P, _ = plegendre(derOrder-1, p); + epsilon = 1e-14; + regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; +elseif regType == "L2" + regMat = I(p); +elseif regType == "std" + regMat = Diagonal(vec(std(X, dims=1))); +elseif regType == "GL" # GL fractional derivative regulariztion + C = ones(p)*1.0; + for k in 2:p + C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; + end + regMat = zeros(p,p); + for i in 1:p + regMat[i:end, i] = C[1:end-i+1]; + end +end + + +# Preliminary calculations +mX = mean(X, dims=1); +X = X .- mX; +my = mean(y); +y = y .- my; +X = X / regMat; +U, s, V = svd(X, full=false); + +# Main calculations +denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; +denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) +yhat = broadcast(./, s .* (U'*y), denom); +yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) +resid = broadcast(.-, y, U * yhat) +H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); +rescv = broadcast(./, resid, broadcast(.-, 1, H)); +press = vec(sum(rescv.^2, dims=1)); +rmsecv = sqrt.(1/n .* press); +gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); + +# Finding lambda that minimises rmsecv and GCV +idminrmsecv = findmin(press)[2][1]; +idmingcv = findmin(gcv)[2][1]; +lambdarmsecv = lambdas[idminrmsecv]; +lambdagcv = lambdas[idmingcv]; + +# Calculating regression coefficients +bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); +bcoeffs = regMat \ bcoeffs; + +if my != 0 + bcoeffs = [my .- mX*bcoeffs; bcoeffs]; +end + +brmsecv = bcoeffs[:, idminrmsecv]; +bgcv = bcoeffs[:, idmingcv]; + + +return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs +end + +""" +# I denne gis X- og y-snitt som input til funksjonen istedenfor å regne ut fra input-dataene. +""" +function TRLooCVUpdateExperimental(X, y, lambdas, bOld, mX, my, regType="L2", derOrder=0) + +n, p = size(X); + +# Finding appropriate regularisation matrix +if regType == "bc" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end +elseif regType == "legendre" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end + P, _ = plegendre(derOrder-1, p); + epsilon = 1e-14; + regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; +elseif regType == "L2" + regMat = I(p); +elseif regType == "std" + regMat = Diagonal(vec(std(X, dims=1))); +elseif regType == "GL" # GL fractional derivative regulariztion + C = ones(p)*1.0; + for k in 2:p + C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; + end + regMat = zeros(p,p); + for i in 1:p + regMat[i:end, i] = C[1:end-i+1]; + end +end + + +# Preliminary calculations +#mX = mean(X, dims=1); +X = X .- mX; +#my = mean(y); +y = y .- my; +X = X / regMat; +U, s, V = svd(X, full=false); + +# Main calculations +denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; +denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) +yhat = broadcast(./, s .* (U'*y), denom); +yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) +resid = broadcast(.-, y, U * yhat) +H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); +rescv = broadcast(./, resid, broadcast(.-, 1, H)); +press = vec(sum(rescv.^2, dims=1)); +rmsecv = sqrt.(1/n .* press); +gcv = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); + +# Finding lambda that minimises rmsecv and GCV +idminrmsecv = findmin(press)[2][1]; +idmingcv = findmin(gcv)[2][1]; +lambdarmsecv = lambdas[idminrmsecv]; +lambdagcv = lambdas[idmingcv]; + +# Calculating regression coefficients +bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); +bcoeffs = regMat \ bcoeffs; + +if my != 0 + bcoeffs = [my .- mX*bcoeffs; bcoeffs]; +end + +brmsecv = bcoeffs[:, idminrmsecv]; +bgcv = bcoeffs[:, idmingcv]; + + +return brmsecv, bgcv, rmsecv, gcv, idminrmsecv, lambdarmsecv, idmingcv, lambdagcv, bcoeffs +end \ No newline at end of file diff --git a/src/TRSegCVUpdate.jl b/src/TRSegCVUpdate.jl new file mode 100644 index 0000000..797f75e --- /dev/null +++ b/src/TRSegCVUpdate.jl @@ -0,0 +1,187 @@ +""" +Fast k-fold cv for updating regression coefficients +""" +function TRSegCVUpdate(X, y, lambdas, cv, bOld, regType="L2", derOrder=0) + +n, p = size(X); + +# Finding appropriate regularisation matrix +if regType == "bc" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end +elseif regType == "legendre" + regMat = [I(p); zeros(derOrder,p)]; + for i = 1:derOrder regMat = diff(regMat, dims = 1); end + P, _ = plegendre(derOrder-1, p); + epsilon = 1e-14; + regMat[end-derOrder+1:end,:] = sqrt(epsilon) * P; +elseif regType == "L2" + regMat = I(p); +elseif regType == "std" + regMat = Diagonal(vec(std(X, dims=1))); +elseif regType == "GL" # GL fractional derivative regulariztion + C = ones(p)*1.0; + for k in 2:p + C[k] = (1-(derOrder+1)/(k-1)) * C[k-1]; + end + + regMat = zeros(p,p); + + for i in 1:p + regMat[i:end, i] = C[1:end-i+1]; + end +end + +# Preliminary calculations +mX = mean(X, dims=1); +X = X .- mX; +my = mean(y); +y = y .- my; +X = X / regMat; +U, s, V = svd(X, full=false); +n_seg = maximum(cv); +n_lambdas = length(lambdas); + +# Finding residuals +denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; +denom2 = broadcast(.+, ones(n), broadcast(./, lambdas', s.^2)) +yhat = broadcast(./, s .* (U'*y), denom) +yhat += s .* broadcast(.-, 1, broadcast(./, 1, denom2)) .* (V' * bOld) +resid = broadcast(.-, y, U * yhat) + +# Finding cross-validated residuals +rescv = zeros(n, n_lambdas); +sdenom = sqrt.(broadcast(./, s, denom))'; + +for seg in 1:n_seg + + Useg = U[vec(cv .== seg),:]; + Id = 1.0 * I(size(Useg,1)) .- 1/n; + + for k in 1:n_lambdas + Uk = Useg .* sdenom[k,:]'; + rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k]; + end +end + +# Calculating rmsecv and regression coefficients +press = sum(rescv.^2, dims=1)'; +rmsecv = sqrt.(1/n .* press); +bcoeffs = V * broadcast(./, (U' * y), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); +bcoeffs = regMat \ bcoeffs; + +# Creating regression coefficients for uncentred data +if my != 0 + bcoeffs = [my .- mX*bcoeffs; bcoeffs]; +end + +# Finding rmsecv-minimal lambda value and associated regression coefficients +lambda_min, lambda_min_ind = findmin(rmsecv) +lambda_min_ind = lambda_min_ind[1] +b_lambda_min = bcoeffs[:,lambda_min_ind] + +return b_lambda_min, rmsecv, lambda_min, lambda_min_ind, bcoeffs +end + + +""" +"Manual" k-fold cv for solving the Ridge regression problem update. +The LS problem is solved explicitly and no shortcuts are used. +""" +function TRSegCVUpdateNaive(X, y, lambdas, cvfolds, bOld) + +n, p = size(X); +rmsecv = zeros(length(lambdas)); +nfolds = length(unique(cvfolds)); + +for j = 1:length(lambdas) + for i = 1:nfolds + inds = (cvfolds .== i); + Xdata = X[vec(.!inds),:]; + ydata = y[vec(.!inds)]; + + mX = mean(Xdata, dims=1); + my = mean(ydata); + Xs = Xdata .- mX; + ys = ydata .- my; + + betas = [Xs; sqrt(lambdas[j]) * I(p)] \ [ys; sqrt(lambdas[j]) * bOld]; + rmsecv[j] += sum((y[vec(inds)] - ((X[vec(inds),:] .- mX) * betas .+ my)).^2); + end +end + +rmsecv = sqrt.(1/n .* rmsecv); + +return rmsecv +end + + +""" +K-fold CV for the Ridge regression update problem, using the 'SVD-trick' for calculating the regression coefficients. +""" +function TRSegCVUpdateFair(X, y, lambdas, cv, bOld) + +n, p = size(X); +rmsecv = zeros(length(lambdas)); +nfolds = length(unique(cv)); + +for i = 1:nfolds + inds = (cv .== i); + Xdata = X[vec(.!inds),:]; + ydata = y[vec(.!inds)]; + + mX = mean(Xdata, dims=1); + my = mean(ydata); + Xs = Xdata .- mX; + ys = ydata .- my; + + U, s, V = svd(Xs, full=false); + + denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; + denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2)); + + # Calculating regression coefficients + bcoeffs = V * broadcast(./, (U' * ys), denom) .+ bOld .- V * broadcast(./, V' * bOld, denom2); + rmsecv += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)'; + +end + +rmsecv = sqrt.(1/n .* rmsecv); + + return rmsecv +end + +""" +K-fold CV for the Ridge regression problem, using the 'SVD-trick' for calculating the regression coefficients. +""" +function TRSegCVFair(X, y, lambdas, cv) + +n, p = size(X); +rmsecv = zeros(length(lambdas)); +nfolds = length(unique(cv)); + +for i = 1:nfolds + inds = (cv .== i); + Xdata = X[vec(.!inds),:]; + ydata = y[vec(.!inds)]; + + mX = mean(Xdata, dims=1); + my = mean(ydata); + Xs = Xdata .- mX; + ys = ydata .- my; + + U, s, V = svd(Xs, full=false); + + denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; + denom2 = broadcast(.+, ones(n-sum(inds)), broadcast(./, lambdas', s.^2)); + + # Calculating regression coefficients + bcoeffs = V * broadcast(./, (U' * ys), denom); + rmsecv += sum((y[vec(inds)] .- ((X[vec(inds),:] .- mX) * bcoeffs .+ my)).^2, dims=1)'; + +end + +rmsecv = sqrt.(1/n .* rmsecv); + + return rmsecv +end \ No newline at end of file diff --git a/src/Ting.jl b/src/Ting.jl index daff2c0..31f7471 100644 --- a/src/Ting.jl +++ b/src/Ting.jl @@ -11,32 +11,32 @@ using OptimizationBBO using LaTeXStrings using Random - -# From "convenience.jl" -export predRegression -export importData - # From "TR.jl" +export plegendre export TRLooCV export TRSegCV -export regularizationMatrix -export TRLooCVUpdate -export TRSegCVUpdate -export plegendre -export TRLooCVUpdateFair -export TRLooCVUpdateNaive -export TRSegCVUpdateNaive -export TRSegCVUpdateFair export TRSegCVNaive export TRSegCVFair +# From "TRSegCVUpdate.jl" +export TRSegCVUpdate +export TRSegCVUpdateNaive +export TRSegCVUpdateFair + +# From "TRLooUpdate.jl" +export TRLooCVUpdate +export TRLooCVUpdateFair +export TRLooCVUpdateNaive +export TRLooCVUpdateExperimental + # From "variousRegressionFunctions.jl" export oldRegCoeffs export PCR -export bidiag2 +export PLS -include("convenience.jl") include("TR.jl") +include("TRSegCVUpdate.jl") +include("TRLooUpdate.jl") include("variousRegressionFunctions.jl") end \ No newline at end of file diff --git a/src/convenience.jl b/src/convenience.jl deleted file mode 100644 index a30abfd..0000000 --- a/src/convenience.jl +++ /dev/null @@ -1,162 +0,0 @@ -# List of functions -#ypred, rmsep = predRegression(X, beta, y) -#ypred = predRegression(X, beta) -#ypred = predRegression(X::Vector{Float64}, beta) -#X, y, waves = importData("Beer"); - - - -################################################################################ - -# predRegression calculates predicted values from a matrix or vector of predictors -# for a linear regression model by X*beta. If there is a constant term it is assumed -# to be the first element. If true output is also provided rmsep is returned as well. - -function predRegression(X, beta, y) - -ypred = predRegression(X, beta) - -rmsep = sqrt(mean((y - ypred).^2)) - -return ypred, rmsep -end - -function predRegression(X, beta) - - if length(beta) == size(X,2) - ypred = X * beta; - elseif length(beta) == size(X,2) + 1 - ypred = beta[1] .+ X * beta[2:end] - end - - return ypred -end - -function predRegression(X::Vector{Float64}, beta) - - if length(beta) == length(X) - ypred = X' * beta; - elseif length(beta) == length(X) + 1 - ypred = beta[1] .+ X' * beta[2:end]; - end - - return ypred -end - -################################################################################ - -""" - importData(datasetName, datasetPath="/home/jovyan/Datasets/") - -Example: X, y, waves = importData("Beer"); -Returns X, Y/G, [waves], +more for some datasets -Valid values for datasetName: 9tumors, bacteria\\_K4\\_K5\\_K6, Beer, Dough, FTIR\\_AMW\\_tidied, FTIR\\_FTIR\\_FTIR\\_AFLP\\_SakacinP, HPLCforweb, -leukemia1, MALDI-TOF\\_melk\\_rep\\_6179\\_corr\\_exact, NIR\\_Raman\\_PUFA\\_[NIR/Raman], NMR\\_WineData, onion\\_NMR, OVALung, Raman\\_Adipose\\_fat\\_pig\\_original, -Ramanmilk, RAMANPorkFat, spectra, Sugar, tumor11 -""" -function importData(datasetName, datasetPath="/home/jovyan/Datasets/") - -#datasetName = "Beer" -#datasetPath = "/home/jovyan/Datasets/" -vars = matread(string(datasetPath,datasetName,".mat")); - -if datasetName == "9tumors" - X = vars["X"]; - G = vars["G"]; -elseif datasetName == "bacteria_K4_K5_K6" - # Causes crash for some reason -elseif datasetName == "Beer" - X = vars["X"]; - Y = vars["Y"]; # G for some datasets - waves = 400:2:2250; - return X, Y, waves -elseif datasetName == "Dough" - X = [vars["Xtrain"]; vars["Xtest"]]; - Y = [vars["Ytrain"]; vars["Ytest"]]; - #if response != "all" - # Y = Y[:,response]; - # Y = dropdims(Y;dims=2); - #end - return X, Y -elseif datasetName == "FTIR_AMW_tidied" - X = vars["X"]; - Y = vars["Y"]; -elseif datasetName == "FTIR_FTIR_FTIR_AFLP_SakacinP" - -elseif datasetName == "HPLCforweb" - X = vars["HPLCforweb"]["data"] -elseif datasetName == "leukemia1" - X = vars["X"]; - G = vars["G"]; -elseif datasetName == "MALDI-TOF_melk_rep_6179_corr_exact" - -elseif datasetName == "NIR_Raman_PUFA" # ONLY RAMAN FOR NOW! - X = vars["Ramandata"]; - Y = vars["PUFAdata"]; - waves = vars["Wavelengths_Raman"]; - return X, Y, waves -elseif datasetName == "NMR_WineData" - X = vars["X"] - Y = vars["Y"]; - ppm = vars["ppm"]; - return X, Y, ppm -elseif datasetName == "onion_NMR" - X = vars["x"] - Y = vars["onion"]; - ppm = vars["ppm"]; - return X, Y, ppm -elseif datasetName == "OVALung" - X = vars["X"]; - G = vars["y"]; - return X, G -elseif datasetName == "Raman_Adipose_fat_pig_original" - X = vars["spectra"] - Y = vars["fat"] - baseline = vars["baseline"] - waves = parse.(Float64,vars["wavelength"]) - return X, Y, waves, baseline -elseif datasetName == "Ramanmilk" - # There are replicates that need to be handled - #X = vars["SpectraR"] - #Y = vars["CLA"] -elseif datasetName == "RAMANPorkFat" - X = vars["X"]["data"] - Y = vars["Y"]["data"] - return X, Y -elseif datasetName == "spectra" - X = vars["NIR"]; - Y = vars["octane"]; - waves = 900:2:1700; - return X, Y, waves -elseif datasetName == "Sugar" - X = [vars["Xtrain"]; vars["Xtest"]]; Y = [vars["Ytrain"]; vars["Ytest"]]; - waves = vars["wave"]; - return X, Y, waves -elseif datasetName == "tumor11" - X = vars["X"]; - G = vars["G"]; -elseif datasetName == "corn" - X = vars["m5spec"]["data"] - X2 = vars["mp5spec"]["data"] - X3 = vars["mp6spec"]["data"] - Y = vars["m5nbs"]["data"] - Y2 = vars["mp5nbs"]["data"] - Y3 = vars["mp6nbs"]["data"] - - waves = 1100:2:2498 - return X, Y, waves, X2, Y2, X3, Y3 - -elseif datasetName == "nir_shootout_2002" - Xtrain = vars["calibrate_1"]["data"] - Xtrain2 = vars["calibrate_2"]["data"] - Xval = vars["validate_1"]["data"] - Xval2 = vars["validate_2"]["data"] - Xtest = vars["test_1"]["data"] - Xtest2 = vars["test_2"]["data"] - Ytrain = vars["calibrate_Y"]["data"] - Yval = vars["validate_Y"]["data"] - Ytest = vars["test_Y"]["data"] - - return Xtrain, Ytrain, Xval, Yval, Xtest, Ytest, Xtrain2, Xval2, Xtest2 -end -end \ No newline at end of file diff --git a/src/variousRegressionFunctions.jl b/src/variousRegressionFunctions.jl index d796dc7..c500a19 100644 --- a/src/variousRegressionFunctions.jl +++ b/src/variousRegressionFunctions.jl @@ -3,13 +3,15 @@ """ - function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=bidiag2) + function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS) Calculates "old" regression coefficients using CV and the 1 S.E. rule. +regFunction = PLS or PCR """ -function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=bidiag2) # bidiag2 OR PCR +function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=PLS) cverror = zeros(ncomps,1); + # Iterating through each cv segment for i=1:length(unique(cvfolds)) Xtrain = X[vec(i .!= cvfolds), :]; ytrain = y[vec(i .!= cvfolds), :]; @@ -20,11 +22,11 @@ function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=bidiag2) # bidiag2 OR P cverror += sum((ytest .- Xtest*betas[2:end,:] .- betas[1,:]').^2, dims=1)'; end - cverror = cverror ./ size(X,1); - #rmsecv = sqrt.(rmsecv ./ size(X, 1)) - #minInd = argmin(rmsecv)[1] - regCoeffsInds = findfirst(cverror .< (minimum(cverror) + std(cverror)/sqrt(length(unique(cvfolds)))))[1] # 1 S.E. rule + # Select components using 1 S.E. rule + cverror = cverror ./ size(X,1); + regCoeffsInds = findfirst(cverror .< (minimum(cverror) + std(cverror)/sqrt(length(unique(cvfolds)))))[1] + # Find model for whole dataset with selected number of components betas, _ = regFunction(X, y, ncomps) bold = betas[:, regCoeffsInds] @@ -32,18 +34,12 @@ function oldRegCoeffs(X, y, cvfolds, ncomps, regFunction=bidiag2) # bidiag2 OR P end """ - function PCR(X, y, kmax, centre=true)#, standardize=true) + function PCR(X, y, kmax) Principal Component Regression (PCR). -Inputs: Data matrix, response vector, maximum number of components. -A constant term is included in the modeling if centre==true. -Outputs: B (matrix of size (p+1) x kmax), U, s, V -ADD STANDARDIZATION?? (NEED TO THINK THROUGH PREDICTION WITH NEW DATA) - -X, y = importData("Beer"); -B, \\_ = PCR(X, y, 10, true, false); +B, \\_ = PCR(X, y, 10); """ -function PCR(X, y, kmax, centre::Bool=true)#, standardize=true) +function PCR(X, y, kmax) stdX = std(X, dims=1); mX = mean(X, dims=1); @@ -76,7 +72,7 @@ end """ - bidiag2(X, y, A) + PLS(X, y, A) Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017) @@ -89,7 +85,7 @@ Julia version of the bidiag2 MATLAB function in Björck and Indahl (2017) - beta - Matrix with regression coefficients (including constant term if centre==true) - W, B, T - Matrices such that X \approx T*B*W'. """ -function bidiag2(X, y, A, centre=true) +function PLS(X, y, A, centre=true) n, p = size(X);