library(Biostrings)
library(tidyverse)
library(readxl)
library(MicrobeR)
library(philr)
library(ggtree)
library(ape)
library(randomForest)
library(ROCR)
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE, tidy=FALSE, cache=FALSE)
HFDcolor="#E69F00"
LFDcolor="#0072B2"
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ROCR_1.0-7 gplots_3.0.1 randomForest_4.6-14
## [4] ape_5.2 ggtree_1.12.0 philr_1.6.0
## [7] MicrobeR_0.3.2 readxl_1.1.0 forcats_0.3.0
## [10] stringr_1.3.1 dplyr_0.7.5 purrr_0.2.5
## [13] readr_1.1.1 tidyr_0.8.1 tibble_1.4.2
## [16] ggplot2_3.0.0 tidyverse_1.2.1 Biostrings_2.48.0
## [19] XVector_0.20.0 IRanges_2.14.10 S4Vectors_0.18.3
## [22] BiocGenerics_0.26.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.3-2 rprojroot_1.3-2 rstudioapi_0.7
## [4] DT_0.4 bit64_0.9-7 lubridate_1.7.4
## [7] xml2_1.2.0 codetools_0.2-16 splines_3.5.3
## [10] mnormt_1.5-5 knitr_1.20 ade4_1.7-11
## [13] jsonlite_1.5 phyloseq_1.24.0 broom_0.4.4
## [16] cluster_2.0.7-1 compiler_3.5.3 httr_1.3.1
## [19] rvcheck_0.1.0 backports_1.1.2 assertthat_0.2.0
## [22] Matrix_1.2-15 lazyeval_0.2.1 cli_1.0.0
## [25] htmltools_0.3.6 tools_3.5.3 bindrcpp_0.2.2
## [28] igraph_1.2.1 gtable_0.2.0 glue_1.2.0
## [31] reshape2_1.4.3 fastmatch_1.1-0 Rcpp_0.12.19
## [34] Biobase_2.40.0 cellranger_1.1.0 zCompositions_1.1.1
## [37] multtest_2.36.0 gdata_2.18.0 nlme_3.1-137
## [40] DECIPHER_2.8.1 iterators_1.0.9 psych_1.8.4
## [43] rvest_0.3.2 phangorn_2.4.0 gtools_3.5.0
## [46] zlibbioc_1.26.0 MASS_7.3-51.1 scales_0.5.0
## [49] rtk_0.2.5.7 hms_0.4.2 biomformat_1.10.1
## [52] rhdf5_2.24.0 yaml_2.2.0 memoise_1.1.0
## [55] NADA_1.6-1 stringi_1.2.3 RSQLite_2.1.1
## [58] foreach_1.4.4 tidytree_0.1.9 permute_0.9-4
## [61] caTools_1.17.1 truncnorm_1.0-8 bitops_1.0-6
## [64] rlang_0.2.1 pkgconfig_2.0.1 evaluate_0.10.1
## [67] lattice_0.20-38 Rhdf5lib_1.2.1 bindr_0.1.1
## [70] treeio_1.4.1 htmlwidgets_1.2 bit_1.1-14
## [73] tidyselect_0.2.4 plyr_1.8.4 magrittr_1.5
## [76] R6_2.2.2 picante_1.7 DBI_1.0.0
## [79] pillar_1.2.3 haven_1.1.1 foreign_0.8-71
## [82] withr_2.1.2 mgcv_1.8-27 survival_2.43-3
## [85] modelr_0.1.2 crayon_1.3.4 KernSmooth_2.23-15
## [88] plotly_4.7.1 rmarkdown_1.10 grid_3.5.3
## [91] data.table_1.11.4 blob_1.1.1 vegan_2.5-2
## [94] digest_0.6.15 munsell_0.5.0 viridisLite_0.3.0
## [97] quadprog_1.5-5
dir.create("figures")
dir.create("RDS")
Reusing data normalized from HFD_features.html and HFD_features_nol.html
RFdata<-readRDS("../Features/RDS/RFdata.RDS")
names(RFdata)<-paste0("l_", names(RFdata))
RFdata_nol<-readRDS("../Features_nol/RDS/RFdata.RDS")
names(RFdata_nol)<-paste0("nol_", names(RFdata_nol))
RFdata<-c(RFdata, RFdata_nol)
rm(RFdata_nol)
metadata<-readRDS("../Features/RDS/RFmetadata.RDS") %>% mutate(Task=if_else(grepl("Murine", Task), "Murine", Task))
mousestudies<-metadata %>% filter(Colonization %in% c("SPF","MUSD")) %>% group_by(StudyID) %>% summarize(Nsamp=n()) %>% arrange((Nsamp))
Predictions<-tibble()
AUCs<-tibble()
for(VSTUDY in mousestudies$StudyID){
tmpmeta<-metadata %>% mutate(Task=if_else(StudyID==VSTUDY, "LODO", Task))
for(x in names(RFdata)){
message(date(), " Running->", VSTUDY," ", x)
fit<-randomForest(
x=t(RFdata[[x]][,subset(tmpmeta, Task=="Murine")$SampleID]),
y=subset(tmpmeta, Task=="Murine")$Diet_Classification,
importance=TRUE
)
for(i in unique(tmpmeta$Task)){
message(x, " ", i)
tm<-
predict(fit, newdata=t(RFdata[[x]][,subset(tmpmeta, Task==i)$SampleID]), type="prob")[,2] %>%
prediction(., subset(tmpmeta, Task==i)$Diet_Classification) %>%
performance(., "tpr","fpr")
Predictions<-bind_rows(Predictions, tibble(LODO=VSTUDY, Data=x, Task=i, FPR=unlist(tm@x.values), TPR=unlist(tm@y.values)))
auc<-predict(fit, newdata=t(RFdata[[x]][,subset(tmpmeta, Task==i)$SampleID]), type="prob")[,2] %>%
prediction(., subset(tmpmeta, Task==i)$Diet_Classification) %>%
performance(., "auc")
AUCs<-bind_rows(AUCs, tibble(LODO=VSTUDY, Data=x, Task=i, AUC=auc@y.values[[1]]))
rm(auc, tm)
}
rm(fit)
}
rm(tmpmeta)
}
saveRDS(Predictions, "RDS/Predictions.RDS")
saveRDS(AUCs, "RDS/AUCs.RDS")
To best represet the ROC curves, will calculate the median and 1st and 3rd quartiles rounded to the nearest 0.05 for plotting purposes.
median_iqr<-function(x){data.frame(y=median(x), ymin=as.numeric(summary(x)[2]), ymax=as.numeric(summary(x)[5]))}
Predictions %>%
filter(Task!="Murine") %>%
separate(Data, c("Data","Type"), sep="_") %>%
mutate(Data=if_else(Data=="l","With Lactococcus","Without Lactococcus")) %>%
mutate(rFPR=round(FPR/0.05)*0.05) %>%
mutate(Data=factor(Data, levels=c("With Lactococcus", "Without Lactococcus"))) %>%
mutate(Task=factor(Task, levels=c("LODO","Humanized Mice","Human"))) %>%
mutate(Type=factor(Type, levels=c("OTU","KO","PhILR"))) %>%
ggplot(aes(x=rFPR, y=TPR, fill=Task, color=Task, group=Task)) +
stat_summary(geom="ribbon", fun.data=median_iqr, alpha=0.5, color=NA) +
stat_summary(geom="line", fun.data=median_iqr, alpha=0.5, linetype="dashed") +
geom_abline(linetype="dashed", color="grey50") +
facet_grid(Data~Type) +
theme_MicrobeR()
ggsave("figures/LODO_curves.pdf", height=3, width=6, useDingbats=FALSE, device="pdf")
Nice.Table(AUCs)
AUCs %>%
filter(Task=="LODO") %>%
group_by(Data) %>%
summarize_at("AUC", funs(median,min,max,IQR, sd)) %>%
separate(Data, c("Lactococcus","Dataset"), sep="_") %>%
mutate(Lactococcus=if_else(Lactococcus=="nol", "Without","With")) %>%
knitr::kable()
Lactococcus | Dataset | median | min | max | IQR | sd |
---|---|---|---|---|---|---|
With | KO | 0.9000000 | 0.4745370 | 1 | 0.3735340 | 0.1903771 |
With | OTU | 0.9333333 | 0.5555556 | 1 | 0.2222222 | 0.1448362 |
With | PhILR | 0.9717029 | 0.4444444 | 1 | 0.2150463 | 0.1569721 |
Without | KO | 0.8166667 | 0.2590278 | 1 | 0.3998595 | 0.2236134 |
Without | OTU | 0.9588816 | 0.4444444 | 1 | 0.3033190 | 0.1710324 |
Without | PhILR | 0.9708514 | 0.5277778 | 1 | 0.2175926 | 0.1596660 |
save.image(paste0("RDS/Session",format(Sys.time(),"%d%b%Y_%H%M%S"),".Rdata"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4535420 242.3 7736977 413.2 NA 7736977 413.2
## Vcells 87662439 668.9 127780896 974.9 16384 94594228 721.7