|
|
|
|
|
|
|
|
|
|
|
if (!require("ggplot2")) install.packages("ggplot2") |
|
if (!require("data.table")) install.packages("data.table") |
|
if (!require("caret")) install.packages("caret") |
|
if (!require("doParallel")) install.packages("doParallel") |
|
if (!require("mltools")) install.packages("mltools") |
|
library(ggplot2) |
|
library(data.table) |
|
library(caret) |
|
library(doParallel) |
|
library("mltools") |
|
|
|
|
|
featuretable <- fread("featuretable4github_revision.txt") |
|
varall <- fread("SupplementaryTable_S1_pathvariantsusedintraining_revision2.txt") |
|
prettynames <- fread("pretty_featurenames2.txt") |
|
famcacscn <- as.data.frame(fread("scncacaa_familyalignedCACNA1Acantranscript.txt")) |
|
|
|
|
|
source("R_functions4predicting_goflof_CACNA1SCN.R") |
|
|
|
|
|
|
|
|
|
|
|
varall <- varall[used_in_functional_prediction%in%1] |
|
varall <- varall[prd_mech_revised%in%c("lof", "gof")] |
|
|
|
varall <- varall[!duplicated(varall[,c("gene", "altAA", "pos")])] |
|
|
|
|
|
|
|
|
|
featuretable[,(c("chr", "genomic_pos", "USED_REF", "STRAND","Feature", "inpp2")):=NULL] |
|
featuretable[,(c(grep("dens", colnames(featuretable)))):=NULL] |
|
|
|
featuretable[,(c("H", "caccon", "SF_DEKA")):=NULL] |
|
featuretable <- unique(featuretable) |
|
|
|
|
|
feat <- featuretable[match(varall$protid, protid)] |
|
feat$Class <- varall$prd_mech_revised |
|
feat <- feat[complete.cases(feat),] |
|
varallmod <- as.data.frame(feat) |
|
|
|
|
|
|
|
outi <- predictgof(varallmod = varallmod, modeltype = "gbm", featuretable = featuretable, alignmentfile = famcacscn) |
|
|
|
model1 <- outi[[2]] |
|
out <- outi[[1]] |
|
write.csv(out, file = 'fuNCion.predictions.csv') |
|
|
|
|
|
|
|
modelperformance(out) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
importance_matrix <- base::summary(model1, plot=F) |
|
colnames(importance_matrix) <- c("Feature", "Importance") |
|
importance_matrix$Feature <- gsub("`","", importance_matrix$Feature) |
|
importance_matrix <- importance_matrix[importance_matrix$Importance>0.05,] |
|
importance_matrix$Feature <- prettynames[match(importance_matrix$Feature, feature_name)]$feature_name4plot |
|
importance_matrix$Feature <- gsub(", DSSP","", importance_matrix$Feature) |
|
|
|
featimpxgb <- ggplot(importance_matrix, |
|
aes( |
|
x = factor(Feature, levels = rev(Feature)), |
|
y = Importance, width = 0.3) |
|
) + |
|
geom_bar(fill ="#00000088", stat = "identity", position = "identity") + |
|
ggplot2::coord_flip() + |
|
xlab("Features")+ |
|
ylab("Relative Influence") + |
|
ggtitle("Feature Importance") + |
|
theme(plot.title = element_text(lineheight = 0.9, |
|
face = "bold"), panel.grid.major.y = element_blank()) + |
|
theme_bw() |
|
featimpxgb |
|
|
|
|