Compare commits

...

10 commits

Author SHA1 Message Date
Joakim Skogholt
6953c9bf9e Added basic simulation of spectroscopic data 2023-05-20 08:55:37 +02:00
Joakim Skogholt
1c71badbef Fixed another error in GL derivative 2023-05-20 08:36:15 +02:00
Joakim Skogholt
c2e265b407 Fixed Error in GL derivative 2023-05-20 08:02:27 +02:00
Joakim Skogholt
13d640527c Added GL-derivative to RegularizationMatrix function 2023-05-19 19:57:17 +02:00
Joakim Skogholt
2b47088921 Fixed many small mistakes 2023-05-18 13:43:18 +02:00
Joakim Skogholt
e0d9a447df And TRSegmentOrthThingy 2023-05-18 13:35:34 +02:00
Joakim Skogholt
705154513f Exported TRVirCV and TRSegCV 2023-05-18 13:34:45 +02:00
Joakim Skogholt
aea04b8b62 Added TRVirSV and TRSegCV + various fixes 2023-05-18 13:26:55 +02:00
Joakim Skogholt
ef4c5ab060 Exported bidiagdecomp function 2023-05-14 08:51:23 +02:00
Joakim Skogholt
46a6c0c238 Added bidiag struct + decomp function 2023-05-14 08:49:00 +02:00
3 changed files with 210 additions and 14 deletions

View file

@ -44,6 +44,11 @@ export TRLooCV
export PlotTRLooCV export PlotTRLooCV
export TRLooCVNum export TRLooCVNum
export TRGCVNum export TRGCVNum
export TRSegCV
export TRVirCV
export TRBidiagDecomp
export simulateSpectrum
include("preprocessing.jl") include("preprocessing.jl")
@ -51,5 +56,6 @@ include("convenience.jl")
include("conveniencePlots.jl") include("conveniencePlots.jl")
include("variousRegressionFunctions.jl") include("variousRegressionFunctions.jl")
include("TR.jl") include("TR.jl")
include("simulateSpectroscopicData.jl")
end end

175
src/TR.jl
View file

@ -1,13 +1,6 @@
using Optimization # For numerical minimization of PRESS statistic
using OptimizationOptimJL # For numerical minimization of PRESS statistic
using Optimization
using OptimizationOptimJL
struct TRSVD struct TRSVD
U::Matrix{Float64} U::Matrix{Float64}
@ -22,12 +15,26 @@ struct TRSVD
end end
struct TRBidiag
W::Matrix{Float64}
B::Bidiagonal{Float64, Vector{Float64}}
T::Matrix{Float64}
y::Vector{Float64}
mX::Matrix{Float64}
my::Float64
regType::String
regParam1::Float64
regMat::Matrix{Float64}
n::Int64
p::Int64
end
""" """
### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION ### ### TO DO: ADD FRACTIONAL DERIVATIVE REGULARIZATION ###
regularizationMatrix(X; regType="legendre", regParam1=0, regParam2=1e-14) regularizationMatrix(X; regType="L2", regParam1=0, regParam2=1e-14)
regularizationMatrix(p::Int64; regType="legendre", regParam1=0, regParam2=1e-14) regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14)
Calculates and returns square regularization matrix. Calculates and returns square regularization matrix.
@ -55,22 +62,56 @@ end
function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14) function regularizationMatrix(p::Int64; regType="L2", regParam1=0, regParam2=1e-14)
if regType == "bc" if regType == "bc" # Discrete derivative with boundary conditions
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
elseif regType == "legendre" elseif regType == "legendre" # Fill in polynomials in bottom row(s) to get square matrix
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
P, _ = plegendre(regParam1-1, p); P, _ = plegendre(regParam1-1, p);
regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P;
elseif regType == "L2" elseif regType == "L2"
regMat = I(p); regMat = I(p);
elseif regType == "std" elseif regType == "std" # Standardization
regMat = regParam2; regMat = regParam2;
elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion
# regParam1 is alpha (order of fractional derivative)
C = ones(p)*1.0;
for k in 2:p
C[k] = (1-(regParam1+1)/(k-1)) * C[k-1];
end
regMat = zeros(p,p);
for i in 1:p
regMat[i:end, i] = C[1:end-i+1];
end
end end
return regMat return regMat
end end
"""
function TRBidiagDecomp(X, y, A=(minimum(size(X))-1), regType="L2", regParam1=0, regParam2=1e-14)
Calculates regularization matrix (using function "RegularizationMatrix"),
and centres and transforms data matrix according to "X / regMat".
Output is an object of type "TRBidiag" and is used as input to other TR functions.
"""
function TRBidiagDecomp(X, y, A=(minimum(size(X))-1), regType="L2", regParam1=0, regParam2=1e-14)
n, p = size(X);
mX = mean(X, dims=1);
X = X .- mX;
my = mean(y);
y = vec(y .- my);
regMat = regularizationMatrix(X; regType, regParam1, regParam2);
X = X / regMat;
_, W, B, T = bidiag2(X, y, A);
TRObj = TRBidiag(W, B, T, y, mX, my, regType, regParam1, regMat, n, p);
return TRObj
end
""" """
function TRSVDDecomp(X, regType="L2", regParam1=0, regParam2=1e-14) function TRSVDDecomp(X, regType="L2", regParam1=0, regParam2=1e-14)
@ -92,6 +133,112 @@ TRObj = TRSVD(U, s, V, mX, regType, regParam1, regMat, n, p);
return TRObj return TRObj
end end
function TRSegmentOrth(X, segments)
n = size(X,1);
n_segments = maximum(segments);
U = zeros(n,n);
for seg in 1:n_segments
inds = vec(seg .== segments)
U[inds, inds], _, _ = svd(X[inds,:], full=false);
end
return U
end
"""
function TRVirCV(X, y, segments, lambdas, regType="L2", regParam1=0, regParam2=1e-14)
Segmented virtual cross-validation (VirCV) for TR models.
Outputs: b, press, lambda_min, lambda_min_ind, GCV
b are (virtual) press-minimal regression coefficients.
"""
function TRVirCV(X, y, segments, lambdas, regType="L2", regParam1=0, regParam2=1e-14)
U_segments = TRSegmentOrth(X, segments);
bs = vec(sum(U_segments, dims=1).^2);
n, p = size(X);
mX = mean(X, dims=1);
X = X .- mX;
my = mean(y);
y = vec(y .- my);
X = U_segments' * X;
y = U_segments' * y;
regMat = regularizationMatrix(p; regType, regParam1, regParam2);
X = X / regMat;
U, s, V = svd(X, full=false);
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
H = broadcast(.+, U.^2 * broadcast(./, s, denom), bs./n);
resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom));
rescv = broadcast(./, resid, broadcast(.-, 1, H));
press = vec(sum(rescv.^2, dims=1));
#rmsecv = sqrt.(1/n .* press);
GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2));
lambda_min, lambda_min_ind = findmin(press);
lambda_min_ind = lambda_min_ind[1];
denom2 = broadcast(.+, lambda_min ./ s', s')';
b = V * broadcast(./, (U' * y), denom2);
b = regMat \ b;
b = [my .- mX*b; b];
return b, press, lambda_min, lambda_min_ind, GCV
end
"""
function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14)
Segmented cross-validation based on the Sherman-Morrison-Woodbury updating formula.
Inputs:
- X : Data matrix
- y : Response vector
- lambdas : Vector of regularization parameter values
- cv : Vector of length n indicating segment membership for each sample
- regType, regParam1, regParam2 : Inputs to regularizationMatrix function
Outputs: rmsecv, b, lambda_min, lambda_min_ind.
b are regression coefficients corresponding to the lambda value minimising the CV-error.
"""
function TRSegCV(X, y, lambdas, cv, regType="L2", regParam1=0, regParam2=1e-14)
TR = TRSVDDecomp(X, regType, regParam1, regParam2);
n_seg = maximum(cv);
n_lambdas = length(lambdas);
my = mean(y);
y = y .- my;
denom = broadcast(.+, broadcast(./, lambdas, TR.s'), TR.s')';
resid = broadcast(.-, y, TR.U * broadcast(./, TR.s .* (TR.U'*y), denom));
rescv = zeros(TR.n, n_lambdas);
sdenom = sqrt.(broadcast(./, TR.s, denom))';
for seg in 1:n_seg
Useg = TR.U[vec(cv .== seg),:];
Id = 1.0 * I(size(Useg,1)) .- 1/TR.n;
for k in 1:n_lambdas
Uk = Useg .* sdenom[k,:]';
rescv[vec(cv .== seg), k] = (Id - Uk * Uk') \ resid[vec(cv .== seg), k];
end
end
press = sum(rescv.^2, dims=1)';
rmsecv = sqrt.(1/TR.n .* press);
lambda_min, lambda_min_ind = findmin(rmsecv)
lambda_min_ind = lambda_min_ind[1]
b = TRRegCoeffs(TR, y, lambda_min, my)
return b, rmsecv, lambda_min, lambda_min_ind
end
""" """

View file

@ -0,0 +1,43 @@
Lk(waves, c, gamma) = @. gamma / (pi*(waves-c)^2+gamma^2);
Gk(waves, c, gamma) = @. 1/(sqrt(2*pi)*gamma) * exp(-(waves-c)^2 / (2*gamma^2));
function pseudoVoigtPeak(waves, c, gamma, eta, alpha)
fk = zeros(length(waves));
for i in 1:length(c)
fk += alpha[i] .* pseudoVoigtPeak(waves, c[i], gamma[i], eta[i]);
end
return fk
end
function pseudoVoigtPeak(waves, c, gamma::Float64, eta::Float64)
fk = eta * Lk(waves, c, gamma) + (1-eta) * Gk(waves, c, gamma);
return fk
end
function simulateBaseline(waves, a)
vm = zeros(length(waves), length(a));
for i in 0:(length(a)-1)
vm[:, i+1] = waves.^i ./ norm(waves.^i);
end
b = vm * a;
return b
end
function simulateSpectrum(waves, c, gamma, eta, alpha, a, sigma=0.0)
pure_spec = pseudoVoigtPeak(waves, c, gamma, eta, alpha);
b = simulateBaseline(waves, a);
noise = randn(length(waves)) .* sigma;
spec = pure_spec + b + noise;
return spec, pure_spec, b, noise
end