|
|
|
|
|
|
|
|
|
|
|
if (!require("ggplot2")) install.packages("ggplot2") |
|
if (!require("data.table")) install.packages("data.table") |
|
if (!require("caret")) install.packages("caret") |
|
if (!require("doParallel")) install.packages("doParallel") |
|
if (!require("mltools")) install.packages("mltools") |
|
library(ggplot2) |
|
library(data.table) |
|
library(caret) |
|
library(doParallel) |
|
library("mltools") |
|
|
|
|
|
featuretable <- fread("featuretable4github_revision.txt") |
|
varall <- fread("SupplementaryTable_S1_pathvariantsusedintraining_revision.txt") |
|
prettynames <- fread("pretty_featurenames2.txt") |
|
gnomadnoneur2 <- fread("SupplTables/S2_neutralvariantsusedintraining3.txt") |
|
|
|
|
|
source("R_functions4predicting_path_GOFLOF_CACNA1SCN.r") |
|
|
|
|
|
|
|
|
|
varall <- varall[used_in_pathogenicity_prediction%in%1] |
|
varall <- varall[!duplicated(varall[,c("gene", "altAA", "pos")])] |
|
|
|
|
|
gnomadnoneur <- gnomadnoneur2[used_in_pathogenicity_prediction==1] |
|
|
|
|
|
|
|
|
|
|
|
|
|
featuretable[,(c("H", "caccon", "SF_DEKA")):=NULL] |
|
|
|
featuretable[,(c("chr", "genomic_pos", "USED_REF", "alt", "pos","refAA", "altAA", "STRAND","Feature", "inpp2")):=NULL] |
|
featuretable <- unique(featuretable) |
|
|
|
|
|
|
|
featgn <- featuretable[match(gnomadnoneur$protid, protid)] |
|
featgn$Class <- "neutral" |
|
|
|
feat <- featuretable[match(varall$protid, protid)] |
|
feat$Class <- "pathogenic" |
|
|
|
|
|
featgn <- featgn[gene%in%feat$gene,] |
|
|
|
feat <- rbind(feat, featgn) |
|
feat <- feat[complete.cases(feat),] |
|
varallmod <- copy(feat) |
|
|
|
|
|
|
|
outi <- predictpath(varallmod = varallmod, modeltype = "gbm") |
|
|
|
model1 <- outi[[2]] |
|
out <- outi[[1]] |
|
|
|
|
|
|
|
|
|
modelperformance(out) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outbalancgenes <- out |
|
|
|
|
|
featgn <- featuretable[match(gnomadnoneur$protid, protid)] |
|
testing <- featgn[!gene%in%varallmod$gene] |
|
out <- data.frame(obs= "neutral", |
|
neutral = predict(model1, newdata = testing, type = "prob")[,"neutral"], |
|
pathogenic = predict(model1, newdata = testing, type = "prob")[,"pathogenic"], |
|
pred= predict(model1, newdata = testing), |
|
gene=testing$gene, |
|
protid=testing$protid |
|
|
|
) |
|
out$obs <- factor(out$obs, levels = unique(out$pred)) |
|
outneutgeens <- out |
|
|
|
vartable <- rbind(outbalancgenes, outneutgeens) |
|
table(vartable[,c("obs", "pred")]) |
|
|
|
|
|
|
|
|
|
|
|
modelperformance(vartable) |
|
|
|
|
|
|
|
|
|
|
|
modelperformance(vartable[grep("SCN",vartable$gene),]) |
|
|
|
|
|
|
|
|
|
|
|
modelperformance(vartable[grep("CAC",vartable$gene),]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
importance_matrix <- base::summary(model1, plot=F) |
|
colnames(importance_matrix) <- c("Feature", "Importance") |
|
importance_matrix$Feature <- gsub("`","", importance_matrix$Feature) |
|
importance_matrix <- importance_matrix[importance_matrix$Importance>0.05,] |
|
importance_matrix$Feature <- prettynames[match(importance_matrix$Feature, feature_name)]$feature_name4plot |
|
importance_matrix$Feature <- gsub(", DSSP","", importance_matrix$Feature) |
|
|
|
featimpxgb <- ggplot(importance_matrix, |
|
aes( |
|
x = factor(Feature, levels = rev(Feature)), |
|
y = Importance, width = 0.3) |
|
) + |
|
geom_bar(fill ="#00000088", stat = "identity", position = "identity") + |
|
ggplot2::coord_flip() + |
|
xlab("Features")+ |
|
ylab("Relative Influence") + |
|
ggtitle("Feature Importance") + |
|
theme(plot.title = element_text(lineheight = 0.9, |
|
face = "bold"), panel.grid.major.y = element_blank()) + |
|
theme_bw() |
|
featimpxgb |
|
|
|
|