From 005a969653885b8b9020a9a59935f77cd9151dfa Mon Sep 17 00:00:00 2001 From: Joakim Skogholt Date: Sat, 3 Feb 2024 11:55:21 +0100 Subject: [PATCH] Lagt til noe --- src/TR.jl | 71 +++++++++++++++++++++++ src/Ting.jl | 24 +++++++- src/convenience.jl | 139 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 232 insertions(+), 2 deletions(-) create mode 100644 src/TR.jl create mode 100644 src/convenience.jl diff --git a/src/TR.jl b/src/TR.jl new file mode 100644 index 0000000..7d75b51 --- /dev/null +++ b/src/TR.jl @@ -0,0 +1,71 @@ + + + + +""" + function TRLooCV + +bpress, bgcv, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV = TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1) + +regType: 'bc', 'legendre', 'L2', 'std', 'GL' +""" +function TRLooCV(X, y, lambdas, regType="L2", regParam1=1, regParam2=1) + +n, p = size(X); +mX = mean(X, dims=1); +X = X .- mX; +my = mean(y); +y = y .- my; + +if regType == "bc" + regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end +elseif regType == "legendre" + regMat = [I(p); zeros(regParam1,p)]; for i = 1:regParam1 regMat = diff(regMat, dims = 1); end + P, _ = plegendre(regParam-1, p); + regMat[end-regParam1+1:end,:] = sqrt(regParam2) * P; +elseif regType == "L2" + regMat = I(p); +elseif regType == "std" + regMat = Diagonal(vec(std(X, dims=1))); +elseif regType == "GL" # Grünwald-Letnikov fractional derivative regulariztion + # regParam1 is alpha (order of fractional derivative) + C = ones(p)*1.0; + for k in 2:p + C[k] = (1-(regParam1+1)/(k-1)) * C[k-1]; + end + + regMat = zeros(p,p); + + for i in 1:p + regMat[i:end, i] = C[1:end-i+1]; + end +end + + +X = X / regMat; +U, s, V = svd(X, full=false) + +denom = broadcast(.+, broadcast(./, lambdas, s'), s')'; +H = broadcast(.+, U.^2 * broadcast(./, s, denom), 1/n); +resid = broadcast(.-, y, U * broadcast(./, s .* (U'*y), denom)); +rescv = broadcast(./, resid, broadcast(.-, 1, H)); +press = vec(sum(rescv.^2, dims=1)); +GCV = vec(broadcast(./, sum(resid.^2, dims=1), mean(broadcast(.-, 1, H), dims=1).^2)); + +idminPRESS = findmin(press)[2][1]; # First index selects coordinates, second selects '1st coordinate' +idminGCV = findmin(GCV)[2][1]; # First index selects coordinates, second selects '1st coordinate' +lambdaPRESS = lambdas[idminPRESS]; +lambdaGCV = lambdas[idminGCV]; + +bcoeffs = V * broadcast(./, (U' * y), denom); +bcoeffs = regMat \ bcoeffs; + +if my != 0 + bcoeffs = [my .- mX*bcoeffs; bcoeffs]; +end + +bpress = bcoeffs[:, idminPRESS] +bgcv = bcoeffs[:, idminGCV] + +return bpress, bgcv, press, GCV, idminPRESS, lambdaPRESS, idminGCV, lambdaGCV +end \ No newline at end of file diff --git a/src/Ting.jl b/src/Ting.jl index ddc74e5..ec47504 100644 --- a/src/Ting.jl +++ b/src/Ting.jl @@ -1,5 +1,25 @@ module Ting -# Write your package code here. +# Dependencies +using LinearAlgebra +using Statistics +using Plots +using MAT +using Optimization +using OptimizationOptimJL +using OptimizationBBO +using LaTeXStrings +using Random -end + +# From "convenience.jl" +export predRegression +export importData + +# From "TR.jl" +export TRLooCV + +include("convenience.jl") +include("TR.jl") + +end \ No newline at end of file diff --git a/src/convenience.jl b/src/convenience.jl new file mode 100644 index 0000000..6a3c4d3 --- /dev/null +++ b/src/convenience.jl @@ -0,0 +1,139 @@ +# List of functions +ypred, rmsep = predRegression(X, beta, y) +ypred = predRegression(X, beta) +ypred = predRegression(X::Vector{Float64}, beta) +X, y, waves = importData("Beer"); + + + +################################################################################ + +# predRegression calculates predicted values from a matrix or vector of predictors +# for a linear regression model by X*beta. If there is a constant term it is assumed +# to be the first element. If true output is also provided rmsep is returned as well. + +function predRegression(X, beta, y) + +ypred = predRegression(X, beta) + +rmsep = sqrt(mean((y - ypred).^2)) + +return ypred, rmsep +end + +function predRegression(X, beta) + + if length(beta) == size(X,2) + ypred = X * beta; + elseif length(beta) == size(X,2) + 1 + ypred = beta[1] .+ X * beta[2:end] + end + + return ypred +end + +function predRegression(X::Vector{Float64}, beta) + + if length(beta) == length(X) + ypred = X' * beta; + elseif length(beta) == length(X) + 1 + ypred = beta[1] .+ X' * beta[2:end]; + end + + return ypred +end + +################################################################################ + +""" + importData(datasetName, datasetPath="/home/jovyan/Datasets/") + +Example: X, y, waves = importData("Beer"); +Returns X, Y/G, [waves], +more for some datasets +Valid values for datasetName: 9tumors, bacteria\\_K4\\_K5\\_K6, Beer, Dough, FTIR\\_AMW\\_tidied, FTIR\\_FTIR\\_FTIR\\_AFLP\\_SakacinP, HPLCforweb, +leukemia1, MALDI-TOF\\_melk\\_rep\\_6179\\_corr\\_exact, NIR\\_Raman\\_PUFA\\_[NIR/Raman], NMR\\_WineData, onion\\_NMR, OVALung, Raman\\_Adipose\\_fat\\_pig\\_original, +Ramanmilk, RAMANPorkFat, spectra, Sugar, tumor11 +""" +function importData(datasetName, datasetPath="/home/jovyan/Datasets/") + +#datasetName = "Beer" +#datasetPath = "/home/jovyan/Datasets/" +vars = matread(string(datasetPath,datasetName,".mat")); + +if datasetName == "9tumors" + X = vars["X"]; + G = vars["G"]; +elseif datasetName == "bacteria_K4_K5_K6" + # Causes crash for some reason +elseif datasetName == "Beer" + X = vars["X"]; + Y = vars["Y"]; # G for some datasets + waves = 400:2:2250; + return X, Y, waves +elseif datasetName == "Dough" + X = [vars["Xtrain"]; vars["Xtest"]]; + Y = [vars["Ytrain"]; vars["Ytest"]]; + #if response != "all" + # Y = Y[:,response]; + # Y = dropdims(Y;dims=2); + #end + return X, Y +elseif datasetName == "FTIR_AMW_tidied" + X = vars["X"]; + Y = vars["Y"]; +elseif datasetName == "FTIR_FTIR_FTIR_AFLP_SakacinP" + +elseif datasetName == "HPLCforweb" + X = vars["HPLCforweb"]["data"] +elseif datasetName == "leukemia1" + X = vars["X"]; + G = vars["G"]; +elseif datasetName == "MALDI-TOF_melk_rep_6179_corr_exact" + +elseif datasetName == "NIR_Raman_PUFA" # ONLY RAMAN FOR NOW! + X = vars["Ramandata"]; + Y = vars["PUFAdata"]; + waves = vars["Wavelengths_Raman"]; + return X, Y, waves +elseif datasetName == "NMR_WineData" + X = vars["X"] + Y = vars["Y"]; + ppm = vars["ppm"]; + return X, Y, ppm +elseif datasetName == "onion_NMR" + X = vars["x"] + Y = vars["onion"]; + ppm = vars["ppm"]; + return X, Y, ppm +elseif datasetName == "OVALung" + X = vars["X"]; + G = vars["y"]; + return X, G +elseif datasetName == "Raman_Adipose_fat_pig_original" + X = vars["spectra"] + Y = vars["fat"] + baseline = vars["baseline"] + waves = parse.(Float64,vars["wavelength"]) + return X, Y, waves, baseline +elseif datasetName == "Ramanmilk" + # There are replicates that need to be handled + #X = vars["SpectraR"] + #Y = vars["CLA"] +elseif datasetName == "RAMANPorkFat" + X = vars["X"]["data"] + Y = vars["Y"]["data"] + return X, Y +elseif datasetName == "spectra" + X = vars["NIR"]; + Y = vars["octane"]; + waves = 900:2:1700; + return X, Y, waves +elseif datasetName == "Sugar" + X = [vars["Xtrain"]; vars["Xtest"]]; Y = [vars["Ytrain"]; vars["Ytest"]]; + waves = vars["wave"]; + return X, Y, waves +elseif datasetName == "tumor11" + X = vars["X"]; + G = vars["G"]; +end +end \ No newline at end of file