Changing output format for preprocessing functions + some minor fixes

This commit is contained in:
Joakim Skogholt 2023-05-10 21:23:05 +02:00
parent 6074e56de2
commit 350c3d9af4
2 changed files with 16 additions and 38 deletions

View file

@ -65,21 +65,6 @@ nTrainValTest = [nTrain, nVal, nTest];
return splits, nTrainValTest return splits, nTrainValTest
end end
"""
createDataSplitInds(X, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
Creates training/validation/test split for dataset
### Arguments
- 'X' : Int, number of samples
- 'props' : 3x1 Vector of proportions (train/val/test), must sum to 1
- 'nSplits' : Int, number of data splits
- 'rngseed' : Int, for reproducibility
### Returns
- 'splits' : size(X,1) x nSplits matrix with indices
- 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing
"""
function createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42) function createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
n = X; n = X;
@ -122,9 +107,7 @@ Creates training/validation/test split for dataset
- 'splitInd' : Index of split to use - 'splitInd' : Index of split to use
- 'X' and 'Y' : Data matrix and response vector/matrix - 'X' and 'Y' : Data matrix and response vector/matrix
### Returns ### Returns dictionary with keys XTrain, XVal, XTest, YTrain, YVal, YTest
- 'splits' : size(X,1) x nSplits matrix with indices
- 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing
""" """
function createDataSplit(splits, nTrainValTest, splitInd, X, Y) function createDataSplit(splits, nTrainValTest, splitInd, X, Y)

View file

@ -67,8 +67,7 @@ means = mean(X, dims=2);
stds = std(X, dims=2); stds = std(X, dims=2);
X_SNV = @. (X - means) / stds; X_SNV = @. (X - means) / stds;
return_values = Dict([("X_Cor", X_SNV), ("means", means), ("stds", stds)]); return X_SNV, Dict([("means", means), ("stds", stds)]);
return return_values
end end
@ -81,7 +80,7 @@ end
MSC preprocessing (subtract constant trend, scale based on projection onto ref. spectrum) MSC preprocessing (subtract constant trend, scale based on projection onto ref. spectrum)
Second argument is mean (default) or svd (first right singular vector), or a vector to be used as reference spectrum Second argument is mean (default) or svd (first right singular vector), or a vector to be used as reference spectrum
Returns dictionary with keys X\\_Cor, X\\_Ref, coeffs Returns X\\_Cor and dictionary with keys X\\_Ref, coeffs
""" """
function MSC(X, ref::String="mean") function MSC(X, ref::String="mean")
@ -99,8 +98,8 @@ B = [ones(size(X,2), 1) X_Ref];
coeffs = B \ X'; coeffs = B \ X';
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:]; X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)]) return_values = Dict([("X_Ref", X_Ref), ("coeffs", coeffs)])
return return_values return X_MSC, return_values
end end
@ -111,7 +110,7 @@ B = [ones(size(X,2), 1) X_Ref];
coeffs = B \ X'; coeffs = B \ X';
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:]; X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)]) return_values = X_MSC, Dict([("X_Ref", X_Ref), ("coeffs", coeffs)])
return return_values return return_values
end end
@ -125,7 +124,7 @@ EMSC correction with mean spectrum as reference and polynomial trends of form Li
First argument is spectra to be corrected, second argument is either degree for polynomial regression First argument is spectra to be corrected, second argument is either degree for polynomial regression
or a basis to be used for correction (basis used should be output of this function, the reference or a basis to be used for correction (basis used should be output of this function, the reference
spectrum is assumed to be the first basis vector). spectrum is assumed to be the first basis vector).
Returns dictionary with keys X\\_Cor, basis, coeffs Returns X\\_Cor and dictionary with keys basis, coeffs
""" """
function EMSCTraditional(X, polDeg::Int64=2) function EMSCTraditional(X, polDeg::Int64=2)
@ -172,7 +171,7 @@ Inputs:
der_order - derivative order der_order - derivative order
keep_endpoints - boolean, truncates spectra if false. NOTE: For derivatives MUST USE FALSE keep_endpoints - boolean, truncates spectra if false. NOTE: For derivatives MUST USE FALSE
Output: Dictionary with keys X\\_Cor, der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints Output: X\\_Cor, Dictionary with keys der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints
""" """
function savitzkyGolay(X, d=2, w=2, der_order=0, keep_endpoints=false) function savitzkyGolay(X, d=2, w=2, der_order=0, keep_endpoints=false)
@ -198,7 +197,7 @@ if !keep_endpoints
X_Cor = X_Cor[:, 1:end-w-1]; X_Cor = X_Cor[:, 1:end-w-1];
end end
return Dict([("X_Cor", X_Cor), ("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]); return X_Cor, Dict([("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]);
end end
@ -272,8 +271,7 @@ for i=1:n
X_Cor[i,:] = X[i,:] - baseline[i,:]; X_Cor[i,:] = X[i,:] - baseline[i,:];
end end
return_values = Dict([("baseline", baseline), ("X_Cor", X_Cor)]); return X_Cor, baseline;
return return_values
end end
@ -287,10 +285,13 @@ end
""" """
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0) function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
function EMSCC(X, model::EMSCModel)
Implementation of EMSC pre-processing based on Skogholt et al. (2018). Implementation of EMSC pre-processing based on Skogholt et al. (2018).
intF is 0 (no interferent), vector, or matrix with intereferents as rows. intF is 0 (no interferent), vector, or matrix with intereferents as rows.
Returns dictionary with keys X\\_Cor, model, coeffs Returns X\\_Cor and dictionary with keys model, coeffs
Second function performs EMSC correction on X with given model.
Returns X\\_Cor and dictionary with keys model, coeffs
""" """
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0) function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
@ -360,13 +361,7 @@ end
""" function EMSC(X, model::EMSCModel)
- EMSCCorrection(X, model::EMSCModel)
Performs EMSC correction on X with given model.
Returns dictionary with keys X\\_Cor, model, coeffs
"""
function EMSCCorrection(X, model::EMSCModel)
n, _ = size(X); n, _ = size(X);
@ -389,6 +384,6 @@ X_Cor = X_Cor ./ mult;
[X_Cor[i,:] = X_Cor[i,:] - model.baseline for i in 1:n]; # Surely this should be possible with broadcasting instead... [X_Cor[i,:] = X_Cor[i,:] - model.baseline for i in 1:n]; # Surely this should be possible with broadcasting instead...
# Thought it would be + baseline above, but this works... # Thought it would be + baseline above, but this works...
return_values = Dict([("X_Cor", X_Cor), ("model", model), ("coeffs", coeffs)]); return_values = X_Cor, Dict([("model", model), ("coeffs", coeffs)]);
return return_values return return_values
end end