Changing output format for preprocessing functions + some minor fixes
This commit is contained in:
parent
6074e56de2
commit
350c3d9af4
2 changed files with 16 additions and 38 deletions
|
|
@ -65,21 +65,6 @@ nTrainValTest = [nTrain, nVal, nTest];
|
||||||
return splits, nTrainValTest
|
return splits, nTrainValTest
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
|
||||||
createDataSplitInds(X, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
|
||||||
|
|
||||||
Creates training/validation/test split for dataset
|
|
||||||
|
|
||||||
### Arguments
|
|
||||||
- 'X' : Int, number of samples
|
|
||||||
- 'props' : 3x1 Vector of proportions (train/val/test), must sum to 1
|
|
||||||
- 'nSplits' : Int, number of data splits
|
|
||||||
- 'rngseed' : Int, for reproducibility
|
|
||||||
|
|
||||||
### Returns
|
|
||||||
- 'splits' : size(X,1) x nSplits matrix with indices
|
|
||||||
- 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing
|
|
||||||
"""
|
|
||||||
function createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
function createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42)
|
||||||
|
|
||||||
n = X;
|
n = X;
|
||||||
|
|
@ -122,9 +107,7 @@ Creates training/validation/test split for dataset
|
||||||
- 'splitInd' : Index of split to use
|
- 'splitInd' : Index of split to use
|
||||||
- 'X' and 'Y' : Data matrix and response vector/matrix
|
- 'X' and 'Y' : Data matrix and response vector/matrix
|
||||||
|
|
||||||
### Returns
|
### Returns dictionary with keys XTrain, XVal, XTest, YTrain, YVal, YTest
|
||||||
- 'splits' : size(X,1) x nSplits matrix with indices
|
|
||||||
- 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing
|
|
||||||
"""
|
"""
|
||||||
function createDataSplit(splits, nTrainValTest, splitInd, X, Y)
|
function createDataSplit(splits, nTrainValTest, splitInd, X, Y)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -67,8 +67,7 @@ means = mean(X, dims=2);
|
||||||
stds = std(X, dims=2);
|
stds = std(X, dims=2);
|
||||||
X_SNV = @. (X - means) / stds;
|
X_SNV = @. (X - means) / stds;
|
||||||
|
|
||||||
return_values = Dict([("X_Cor", X_SNV), ("means", means), ("stds", stds)]);
|
return X_SNV, Dict([("means", means), ("stds", stds)]);
|
||||||
return return_values
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -81,7 +80,7 @@ end
|
||||||
|
|
||||||
MSC preprocessing (subtract constant trend, scale based on projection onto ref. spectrum)
|
MSC preprocessing (subtract constant trend, scale based on projection onto ref. spectrum)
|
||||||
Second argument is mean (default) or svd (first right singular vector), or a vector to be used as reference spectrum
|
Second argument is mean (default) or svd (first right singular vector), or a vector to be used as reference spectrum
|
||||||
Returns dictionary with keys X\\_Cor, X\\_Ref, coeffs
|
Returns X\\_Cor and dictionary with keys X\\_Ref, coeffs
|
||||||
"""
|
"""
|
||||||
function MSC(X, ref::String="mean")
|
function MSC(X, ref::String="mean")
|
||||||
|
|
||||||
|
|
@ -99,8 +98,8 @@ B = [ones(size(X,2), 1) X_Ref];
|
||||||
coeffs = B \ X';
|
coeffs = B \ X';
|
||||||
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
|
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
|
||||||
|
|
||||||
return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)])
|
return_values = Dict([("X_Ref", X_Ref), ("coeffs", coeffs)])
|
||||||
return return_values
|
return X_MSC, return_values
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -111,7 +110,7 @@ B = [ones(size(X,2), 1) X_Ref];
|
||||||
coeffs = B \ X';
|
coeffs = B \ X';
|
||||||
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
|
X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:];
|
||||||
|
|
||||||
return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)])
|
return_values = X_MSC, Dict([("X_Ref", X_Ref), ("coeffs", coeffs)])
|
||||||
return return_values
|
return return_values
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -125,7 +124,7 @@ EMSC correction with mean spectrum as reference and polynomial trends of form Li
|
||||||
First argument is spectra to be corrected, second argument is either degree for polynomial regression
|
First argument is spectra to be corrected, second argument is either degree for polynomial regression
|
||||||
or a basis to be used for correction (basis used should be output of this function, the reference
|
or a basis to be used for correction (basis used should be output of this function, the reference
|
||||||
spectrum is assumed to be the first basis vector).
|
spectrum is assumed to be the first basis vector).
|
||||||
Returns dictionary with keys X\\_Cor, basis, coeffs
|
Returns X\\_Cor and dictionary with keys basis, coeffs
|
||||||
"""
|
"""
|
||||||
function EMSCTraditional(X, polDeg::Int64=2)
|
function EMSCTraditional(X, polDeg::Int64=2)
|
||||||
|
|
||||||
|
|
@ -172,7 +171,7 @@ Inputs:
|
||||||
der_order - derivative order
|
der_order - derivative order
|
||||||
keep_endpoints - boolean, truncates spectra if false. NOTE: For derivatives MUST USE FALSE
|
keep_endpoints - boolean, truncates spectra if false. NOTE: For derivatives MUST USE FALSE
|
||||||
|
|
||||||
Output: Dictionary with keys X\\_Cor, der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints
|
Output: X\\_Cor, Dictionary with keys der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints
|
||||||
"""
|
"""
|
||||||
function savitzkyGolay(X, d=2, w=2, der_order=0, keep_endpoints=false)
|
function savitzkyGolay(X, d=2, w=2, der_order=0, keep_endpoints=false)
|
||||||
|
|
||||||
|
|
@ -198,7 +197,7 @@ if !keep_endpoints
|
||||||
X_Cor = X_Cor[:, 1:end-w-1];
|
X_Cor = X_Cor[:, 1:end-w-1];
|
||||||
end
|
end
|
||||||
|
|
||||||
return Dict([("X_Cor", X_Cor), ("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]);
|
return X_Cor, Dict([("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -272,8 +271,7 @@ for i=1:n
|
||||||
X_Cor[i,:] = X[i,:] - baseline[i,:];
|
X_Cor[i,:] = X[i,:] - baseline[i,:];
|
||||||
end
|
end
|
||||||
|
|
||||||
return_values = Dict([("baseline", baseline), ("X_Cor", X_Cor)]);
|
return X_Cor, baseline;
|
||||||
return return_values
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -287,10 +285,13 @@ end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
|
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
|
||||||
|
function EMSCC(X, model::EMSCModel)
|
||||||
|
|
||||||
Implementation of EMSC pre-processing based on Skogholt et al. (2018).
|
Implementation of EMSC pre-processing based on Skogholt et al. (2018).
|
||||||
intF is 0 (no interferent), vector, or matrix with intereferents as rows.
|
intF is 0 (no interferent), vector, or matrix with intereferents as rows.
|
||||||
Returns dictionary with keys X\\_Cor, model, coeffs
|
Returns X\\_Cor and dictionary with keys model, coeffs
|
||||||
|
Second function performs EMSC correction on X with given model.
|
||||||
|
Returns X\\_Cor and dictionary with keys model, coeffs
|
||||||
"""
|
"""
|
||||||
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
|
function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0)
|
||||||
|
|
||||||
|
|
@ -360,13 +361,7 @@ end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
function EMSC(X, model::EMSCModel)
|
||||||
- EMSCCorrection(X, model::EMSCModel)
|
|
||||||
|
|
||||||
Performs EMSC correction on X with given model.
|
|
||||||
Returns dictionary with keys X\\_Cor, model, coeffs
|
|
||||||
"""
|
|
||||||
function EMSCCorrection(X, model::EMSCModel)
|
|
||||||
|
|
||||||
n, _ = size(X);
|
n, _ = size(X);
|
||||||
|
|
||||||
|
|
@ -389,6 +384,6 @@ X_Cor = X_Cor ./ mult;
|
||||||
[X_Cor[i,:] = X_Cor[i,:] - model.baseline for i in 1:n]; # Surely this should be possible with broadcasting instead...
|
[X_Cor[i,:] = X_Cor[i,:] - model.baseline for i in 1:n]; # Surely this should be possible with broadcasting instead...
|
||||||
# Thought it would be + baseline above, but this works...
|
# Thought it would be + baseline above, but this works...
|
||||||
|
|
||||||
return_values = Dict([("X_Cor", X_Cor), ("model", model), ("coeffs", coeffs)]);
|
return_values = X_Cor, Dict([("model", model), ("coeffs", coeffs)]);
|
||||||
return return_values
|
return return_values
|
||||||
end
|
end
|
||||||
Loading…
Add table
Reference in a new issue