##### Get started #####

#install required packages prior to loading if needed
library('e1071')
library('caret')
library('dplyr')


#### Import and transform data ####

# read in phenotype and log-transformed NIRS data
# NIRS spectra are truncated to 450-2450 nm and log-transformed (Log10(1/X))
nirs.pheno.part.log <- read.csv("NIRS_Classify_Sage_Phenotypes.csv")

# convert phenotypes from 'as.characters' to 'as.factors'
nirs.pheno.part.log$Site <- as.factor(nirs.pheno.part.log$Site)
nirs.pheno.part.log$PatchType <- as.factor(nirs.pheno.part.log$PatchType)
nirs.pheno.part.log$ForageType <- as.factor(nirs.pheno.part.log$ForageType)
nirs.pheno.part.log$Species <- as.factor(nirs.pheno.part.log$Species)
nirs.pheno.part.log$Season <- as.factor(nirs.pheno.part.log$Season)

# split data into train and test sets (via random stratified sampling, i.e. representative sampling)
set.seed(1)
nirs.pheno.part.log.index <- 1:nrow(nirs.pheno.part.log)
nirs.pheno.part.log.index.test <- sample(nirs.pheno.part.log.index, trunc(length(nirs.pheno.part.log.index)/4))
nirs.pheno.part.log.index.test.set <- nirs.pheno.part.log[nirs.pheno.part.log.index.test,]
nirs.pheno.part.log.index.train.set <- nirs.pheno.part.log[-nirs.pheno.part.log.index.test,]

# rename train and test sets for cleaner use
train.set <- nirs.pheno.part.log.index.train.set
test.set <- nirs.pheno.part.log.index.test.set


##### Classify populations (wyomingensis) #####

site.combined <- nirs.pheno.part.log$Site
site.combined <- recode_factor(site.combined, JNMagic="Magic", KGMagic="Magic")
nirs.pheno.part.log.sitecombined <- data.frame (site.combined,nirs.pheno.part.log)

# split data into train and test sets (via random stratified sampling, i.e. representative sampling)
set.seed(1)
nirs.pheno.part.log.sitecombined.index <- 1:nrow(nirs.pheno.part.log.sitecombined)
nirs.pheno.part.log.sitecombined.index.test <- sample(nirs.pheno.part.log.sitecombined.index, trunc(length(nirs.pheno.part.log.sitecombined.index)/4))
nirs.pheno.part.log.sitecombined.index.test.set <- nirs.pheno.part.log.sitecombined[nirs.pheno.part.log.sitecombined.index.test,]
nirs.pheno.part.log.sitecombined.index.train.set <- nirs.pheno.part.log.sitecombined[-nirs.pheno.part.log.sitecombined.index.test,]

# rename train and test sets for cleaner use
train.set.sitecombined <- nirs.pheno.part.log.sitecombined.index.train.set
test.set.sitecombined <- nirs.pheno.part.log.sitecombined.index.test.set

# change Site character to factor
train.set.sitecombined$Site <- as.factor(train.set.sitecombined$Site)
test.set.sitecombined$Site <- as.factor(test.set.sitecombined$Site)

# produce train and test sets for phenotype
train.site.combined <- train.set.sitecombined[,-c(2:8)]
test.site.combined <- test.set.sitecombined[,-c(2:8)]

# produce filtered train and test sets for phenotype
## wyoming species only
train.site.combined.filter <- filter(train.set.sitecombined[,-c(2:8)], train.set.sitecombined$Species %in% c("wyoming"))
test.site.combined.filter <- filter(test.set.sitecombined[,-c(2:8)], test.set.sitecombined$Species %in% c("wyoming"))

# run svm
svm.model.site.combined.filter <- svm(site.combined~., data=train.site.combined.filter, kernel='linear')
svm.pred.site.combined.filter <- predict(svm.model.site.combined.filter, test.site.combined.filter[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.site.combined.filter$site.combined,svm.pred.site.combined.filter)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Magic species #####

# produce train and test sets for phenotype
train.species.Magic <- filter(train.set[,-c(1:5,7)], train.set$Species %in% c("wyoming","dwarf","3-tip"), train.set$Site %in% c("KGMagic","JNMagic"))
test.species.Magic <- filter(test.set[,-c(1:5,7)], test.set$Species %in% c("wyoming","dwarf","3-tip"), test.set$Site %in% c("KGMagic","JNMagic"))

# run svm
svm.model.species.Magic <- svm(Species~., data=train.species.Magic, kernel='linear')
svm.pred.species.Magic <- predict(svm.model.species.Magic, test.species.Magic[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.species.Magic$Species,svm.pred.species.Magic)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Cedar Gulch species #####

# produce train and test sets for phenotype
train.species.Cedar <- filter(train.set[,-c(1:5,7)], train.set$Species %in% c("wyoming","dwarf","nova"), train.set$Site %in% c("Cedar Gulch"))
test.species.Cedar <- filter(test.set[,-c(1:5,7)], test.set$Species %in% c("wyoming","dwarf","nova"), test.set$Site %in% c("Cedar Gulch"))

# run svm
svm.model.species.Cedar <- svm(Species~., data=train.species.Cedar, kernel='linear')
svm.pred.species.Cedar <- predict(svm.model.species.Cedar, test.species.Cedar[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.species.Cedar$Species,svm.pred.species.Cedar)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Craters species #####

# produce train and test sets for phenotype
train.species.Craters <- filter(train.set[,-c(1:5,7)], train.set$Species %in% c("wyoming","3-tip"), train.set$Site %in% c("Craters"))
test.species.Craters <- filter(test.set[,-c(1:5,7)], test.set$Species %in% c("wyoming","3-tip"), test.set$Site %in% c("Craters"))

# run svm
svm.model.species.Craters <- svm(Species~., data=train.species.Craters, kernel='linear')
svm.pred.species.Craters <- predict(svm.model.species.Craters, test.species.Craters[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.species.Craters$Species,svm.pred.species.Craters)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Raft River species #####

# produce train and test sets for phenotype
train.species.Raft <- filter(train.set[,-c(1:5,7)], train.set$Species %in% c("wyoming","arbuscula"), train.set$Site %in% c("Raft River"))
test.species.Raft <- filter(test.set[,-c(1:5,7)], test.set$Species %in% c("wyoming","arbuscula"), test.set$Site %in% c("Raft River"))

# run svm
svm.model.species.Raft <- svm(Species~., data=train.species.Raft, kernel='linear')
svm.pred.species.Raft <- predict(svm.model.species.Raft, test.species.Raft[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.species.Raft$Species,svm.pred.species.Raft)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Magic season (wyomingensis) #####

# produce train and test sets for phenotype
train.season.JNMagic.filter <- filter(train.set[,-c(1:6)], train.set$Season %in% c("summer","winter"), train.set$Site %in% c("JNMagic"), train.set$Species %in% c("wyoming"))
test.season.JNMagic.filter <- filter(test.set[,-c(1:6)], test.set$Season %in% c("summer","winter"), test.set$Site %in% c("JNMagic"), test.set$Species %in% c("wyoming"))

# run svm
svm.model.season.JNMagic.filter <- svm(Season~., data=train.season.JNMagic.filter, kernel='linear')
svm.pred.season.JNMagic.filter <- predict(svm.model.season.JNMagic.filter, test.season.JNMagic.filter[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.season.JNMagic.filter$Season,svm.pred.season.JNMagic.filter)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Cedar Gulch season (wyomingensis) #####

# produce train and test sets for phenotype
train.season.Cedar.filter <- filter(train.set[,-c(1:6)], train.set$Season %in% c("summer","winter"), train.set$Site %in% c("Cedar Gulch"), train.set$Species %in% c("wyoming"))
test.season.Cedar.filter <- filter(test.set[,-c(1:6)], test.set$Season %in% c("summer","winter"), test.set$Site %in% c("Cedar Gulch"), test.set$Species %in% c("wyoming"))

# run svm
svm.model.season.Cedar.filter <- svm(Season~., data=train.season.Cedar.filter, kernel='linear')
svm.pred.season.Cedar.filter <- predict(svm.model.season.Cedar.filter, test.season.Cedar.filter[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.season.Cedar.filter$Season,svm.pred.season.Cedar.filter)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table)


##### Classify Magic year (wyomingensis) #####

# produce train and test sets for phenotype
train.year.Magic.filter <- filter(train.set[,-c(1:2,4:7)], train.set$Site %in% c("KGMagic","JNMagic"), train.set$Species %in% c("wyoming"))
test.year.Magic.filter <- filter(test.set[,-c(1:2,4:7)], test.set$Site %in% c("KGMagic","JNMagic"), test.set$Species %in% c("wyoming"))

# run svm
svm.model.year.Magic.filter <- svm(Site~., data=train.year.Magic.filter, kernel='linear')
svm.pred.year.Magic.filter <- predict(svm.model.year.Magic.filter, test.year.Magic.filter[-1])

# compute svm confusion matrix
matrix<-confusionMatrix(test.year.Magic.filter$Site,svm.pred.year.Magic.filter)
print("Accuracy");matrix$overall[[1]]
print("Kappa index");matrix$overall[[2]]
print(matrix$table) #JNMagic = 2015, KGMagic = 2013
