Lagt til noe

This commit is contained in:
Joakim Skogholt 2024-02-03 11:55:21 +01:00
parent d08df77283
commit 005a969653
3 changed files with 232 additions and 2 deletions

71
src/TR.jl Normal file
View file

@ -0,0 +1,71 @@
"""
function TRLooCV
bpress, bgcv, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV = TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1)
regType: 'bc', 'legendre', 'L2', 'std', 'GL'
"""
function TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1)
n, p = size(X);
mX = mean(X, dims=1);
X = X .- mX;
my = mean(y);
y = y .- my;
if regType == "bc"
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
elseif regType == "legendre"
regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end
P, _ = plegendre(regParam-1, p);
regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P;
elseif regType == "L2"
regMat = I(p);
elseif regType == "std"
regMat = Diagonal(vec(std(X, dims=1)));
elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion
# regParam1 is alpha (order of fractional derivative)
C = ones(p)*1.0;
for k in 2:p
C[k] = (1-(regParam1+1)/(k-1)) * C[k-1];
end
regMat = zeros(p,p);
for i in 1:p
regMat[i:end, i] = C[1:end-i+1];
end
end
X = X / regMat;
U, s, V = svd(X, full=false)
denom = broadcast(.+, broadcast(./, lambdas, s'), s')';
H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n);
resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom));
rescv = broadcast(./, resid, broadcast(.-, 1, H));
press = vec(sum(rescv.^2, dims=1));
GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2));
idminPRESS = findmin(press)[2][1]; # First index selects coordinates, second selects '1st coordinate'
idminGCV = findmin(GCV)[2][1]; # First index selects coordinates, second selects '1st coordinate'
lambdaPRESS = lambdas[idminPRESS];
lambdaGCV = lambdas[idminGCV];
bcoeffs = V * broadcast(./, (U' * y), denom);
bcoeffs = regMat \ bcoeffs;
if my != 0
bcoeffs = [my .- mX*bcoeffs; bcoeffs];
end
bpress = bcoeffs[:, idminPRESS]
bgcv = bcoeffs[:, idminGCV]
return bpress, bgcv, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV
end

View file

@ -1,5 +1,25 @@
module Ting module Ting
# Write your package code here. # Dependencies
using LinearAlgebra
using Statistics
using Plots
using MAT
using Optimization
using OptimizationOptimJL
using OptimizationBBO
using LaTeXStrings
using Random
# From "convenience.jl"
export predRegression
export importData
# From "TR.jl"
export TRLooCV
include("convenience.jl")
include("TR.jl")
end end

139
src/convenience.jl Normal file
View file

@ -0,0 +1,139 @@
# List of functions
ypred, rmsep = predRegression(X, beta, y)
ypred = predRegression(X, beta)
ypred = predRegression(X::Vector{Float64}, beta)
X, y, waves = importData("Beer");
################################################################################
# predRegression calculates predicted values from a matrix or vector of predictors
# for a linear regression model by X*beta. If there is a constant term it is assumed
# to be the first element. If true output is also provided rmsep is returned as well.
function predRegression(X, beta, y)
ypred = predRegression(X, beta)
rmsep = sqrt(mean((y - ypred).^2))
return ypred, rmsep
end
function predRegression(X, beta)
if length(beta) == size(X,2)
ypred = X * beta;
elseif length(beta) == size(X,2) + 1
ypred = beta[1] .+ X * beta[2:end]
end
return ypred
end
function predRegression(X::Vector{Float64}, beta)
if length(beta) == length(X)
ypred = X' * beta;
elseif length(beta) == length(X) + 1
ypred = beta[1] .+ X' * beta[2:end];
end
return ypred
end
################################################################################
"""
importData(datasetName, datasetPath="/home/jovyan/Datasets/")
Example: X, y, waves = importData("Beer");
Returns X, Y/G, [waves], +more for some datasets
Valid values for datasetName: 9tumors, bacteria\\_K4\\_K5\\_K6, Beer, Dough, FTIR\\_AMW\\_tidied, FTIR\\_FTIR\\_FTIR\\_AFLP\\_SakacinP, HPLCforweb,
leukemia1, MALDI-TOF\\_melk\\_rep\\_6179\\_corr\\_exact, NIR\\_Raman\\_PUFA\\_[NIR/Raman], NMR\\_WineData, onion\\_NMR, OVALung, Raman\\_Adipose\\_fat\\_pig\\_original,
Ramanmilk, RAMANPorkFat, spectra, Sugar, tumor11
"""
function importData(datasetName, datasetPath="/home/jovyan/Datasets/")
#datasetName = "Beer"
#datasetPath = "/home/jovyan/Datasets/"
vars = matread(string(datasetPath,datasetName,".mat"));
if datasetName == "9tumors"
X = vars["X"];
G = vars["G"];
elseif datasetName == "bacteria_K4_K5_K6"
# Causes crash for some reason
elseif datasetName == "Beer"
X = vars["X"];
Y = vars["Y"]; # G for some datasets
waves = 400:2:2250;
return X, Y, waves
elseif datasetName == "Dough"
X = [vars["Xtrain"]; vars["Xtest"]];
Y = [vars["Ytrain"]; vars["Ytest"]];
#if response != "all"
# Y = Y[:,response];
# Y = dropdims(Y;dims=2);
#end
return X, Y
elseif datasetName == "FTIR_AMW_tidied"
X = vars["X"];
Y = vars["Y"];
elseif datasetName == "FTIR_FTIR_FTIR_AFLP_SakacinP"
elseif datasetName == "HPLCforweb"
X = vars["HPLCforweb"]["data"]
elseif datasetName == "leukemia1"
X = vars["X"];
G = vars["G"];
elseif datasetName == "MALDI-TOF_melk_rep_6179_corr_exact"
elseif datasetName == "NIR_Raman_PUFA" # ONLY RAMAN FOR NOW!
X = vars["Ramandata"];
Y = vars["PUFAdata"];
waves = vars["Wavelengths_Raman"];
return X, Y, waves
elseif datasetName == "NMR_WineData"
X = vars["X"]
Y = vars["Y"];
ppm = vars["ppm"];
return X, Y, ppm
elseif datasetName == "onion_NMR"
X = vars["x"]
Y = vars["onion"];
ppm = vars["ppm"];
return X, Y, ppm
elseif datasetName == "OVALung"
X = vars["X"];
G = vars["y"];
return X, G
elseif datasetName == "Raman_Adipose_fat_pig_original"
X = vars["spectra"]
Y = vars["fat"]
baseline = vars["baseline"]
waves = parse.(Float64,vars["wavelength"])
return X, Y, waves, baseline
elseif datasetName == "Ramanmilk"
# There are replicates that need to be handled
#X = vars["SpectraR"]
#Y = vars["CLA"]
elseif datasetName == "RAMANPorkFat"
X = vars["X"]["data"]
Y = vars["Y"]["data"]
return X, Y
elseif datasetName == "spectra"
X = vars["NIR"];
Y = vars["octane"];
waves = 900:2:1700;
return X, Y, waves
elseif datasetName == "Sugar"
X = [vars["Xtrain"]; vars["Xtest"]]; Y = [vars["Ytrain"]; vars["Ytest"]];
waves = vars["wave"];
return X, Y, waves
elseif datasetName == "tumor11"
X = vars["X"];
G = vars["G"];
end
end