diff --git a/src/convenience.jl b/src/convenience.jl index e231a36..4712aa0 100644 --- a/src/convenience.jl +++ b/src/convenience.jl @@ -65,21 +65,6 @@ nTrainValTest = [nTrain, nVal, nTest]; return splits, nTrainValTest end -""" - createDataSplitInds(X, nSplits, props=[6/10, 2/10, 2/10], rngseed=42) - -Creates training/validation/test split for dataset - -### Arguments - - 'X' : Int, number of samples - - 'props' : 3x1 Vector of proportions (train/val/test), must sum to 1 - - 'nSplits' : Int, number of data splits - - 'rngseed' : Int, for reproducibility - -### Returns - - 'splits' : size(X,1) x nSplits matrix with indices - - 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing -""" function createDataSplitInds(X::Int64, nSplits, props=[6/10, 2/10, 2/10], rngseed=42) n = X; @@ -122,9 +107,7 @@ Creates training/validation/test split for dataset - 'splitInd' : Index of split to use - 'X' and 'Y' : Data matrix and response vector/matrix -### Returns - - 'splits' : size(X,1) x nSplits matrix with indices - - 'nTrainValTest' : 3x1 vector indicating number of samples in training/validation/testing +### Returns dictionary with keys XTrain, XVal, XTest, YTrain, YVal, YTest """ function createDataSplit(splits, nTrainValTest, splitInd, X, Y) diff --git a/src/preprocessing.jl b/src/preprocessing.jl index 521db69..52d1fe6 100644 --- a/src/preprocessing.jl +++ b/src/preprocessing.jl @@ -67,8 +67,7 @@ means = mean(X, dims=2); stds = std(X, dims=2); X_SNV = @. (X - means) / stds; -return_values = Dict([("X_Cor", X_SNV), ("means", means), ("stds", stds)]); -return return_values +return X_SNV, Dict([("means", means), ("stds", stds)]); end @@ -81,7 +80,7 @@ end MSC preprocessing (subtract constant trend, scale based on projection onto ref. spectrum) Second argument is mean (default) or svd (first right singular vector), or a vector to be used as reference spectrum -Returns dictionary with keys X\\_Cor, X\\_Ref, coeffs +Returns X\\_Cor and dictionary with keys X\\_Ref, coeffs """ function MSC(X, ref::String="mean") @@ -99,8 +98,8 @@ B = [ones(size(X,2), 1) X_Ref]; coeffs = B \ X'; X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:]; -return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)]) -return return_values +return_values = Dict([("X_Ref", X_Ref), ("coeffs", coeffs)]) +return X_MSC, return_values end @@ -111,7 +110,7 @@ B = [ones(size(X,2), 1) X_Ref]; coeffs = B \ X'; X_MSC = @. (X - coeffs[1,:]) / coeffs[2,:]; -return_values = Dict([("X_Cor", X_MSC), ("X_Ref", X_Ref), ("coeffs", coeffs)]) +return_values = X_MSC, Dict([("X_Ref", X_Ref), ("coeffs", coeffs)]) return return_values end @@ -125,7 +124,7 @@ EMSC correction with mean spectrum as reference and polynomial trends of form Li First argument is spectra to be corrected, second argument is either degree for polynomial regression or a basis to be used for correction (basis used should be output of this function, the reference spectrum is assumed to be the first basis vector). -Returns dictionary with keys X\\_Cor, basis, coeffs +Returns X\\_Cor and dictionary with keys basis, coeffs """ function EMSCTraditional(X, polDeg::Int64=2) @@ -172,7 +171,7 @@ Inputs: der_order - derivative order keep_endpoints - boolean, truncates spectra if false. NOTE: For derivatives MUST USE FALSE -Output: Dictionary with keys X\\_Cor, der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints +Output: X\\_Cor, Dictionary with keys der\\_order, degree, window\\_size, filter\\_coeffs, keep\\_endpoints """ function savitzkyGolay(X, d=2, w=2, der_order=0, keep_endpoints=false) @@ -198,7 +197,7 @@ if !keep_endpoints X_Cor = X_Cor[:, 1:end-w-1]; end -return Dict([("X_Cor", X_Cor), ("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]); +return X_Cor, Dict([("der_order", der_order), ("degree", d), ("window_size", w), ("filter_coeffs", a), ("keep_endpoints", keep_endpoints)]); end @@ -272,8 +271,7 @@ for i=1:n X_Cor[i,:] = X[i,:] - baseline[i,:]; end -return_values = Dict([("baseline", baseline), ("X_Cor", X_Cor)]); -return return_values +return X_Cor, baseline; end @@ -287,10 +285,13 @@ end """ function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0) + function EMSCC(X, model::EMSCModel) Implementation of EMSC pre-processing based on Skogholt et al. (2018). intF is 0 (no interferent), vector, or matrix with intereferents as rows. -Returns dictionary with keys X\\_Cor, model, coeffs +Returns X\\_Cor and dictionary with keys model, coeffs +Second function performs EMSC correction on X with given model. +Returns X\\_Cor and dictionary with keys model, coeffs """ function EMSC(X, polDeg=2, refType="svd", nRef=1, baseDeg=-1, intF=0) @@ -360,13 +361,7 @@ end -""" - - EMSCCorrection(X, model::EMSCModel) - -Performs EMSC correction on X with given model. -Returns dictionary with keys X\\_Cor, model, coeffs -""" -function EMSCCorrection(X, model::EMSCModel) +function EMSC(X, model::EMSCModel) n, _ = size(X); @@ -389,6 +384,6 @@ X_Cor = X_Cor ./ mult; [X_Cor[i,:] = X_Cor[i,:] - model.baseline for i in 1:n]; # Surely this should be possible with broadcasting instead... # Thought it would be + baseline above, but this works... -return_values = Dict([("X_Cor", X_Cor), ("model", model), ("coeffs", coeffs)]); +return_values = X_Cor, Dict([("model", model), ("coeffs", coeffs)]); return return_values end \ No newline at end of file