Select Git revision
-
Holger Brandl authoredHolger Brandl authored
cp_utils.R 4.92 KiB
install_package("clusterProfiler")
## todo move to diffex commons
guess_cp_species <- function(ensIds){
an_id <-ensIds[1]
if(str_detect(an_id, "ENSG")){
return("human")
}else if(str_detect(an_id, "ENSMUSG")){
return("mouse")
}else if(str_detect(an_id, "ENSDARG")){
return("zebrafish")
}else if(str_detect(an_id, "FBgn")){
return("fly")
}else{
stop(paste("could not clusterProfiler species name from ", an_id))
}
}
guess_anno_db <- function(ensIds){
an_id <-ensIds[1]
if(str_detect(an_id, "ENSG")){
return("org.Hs.eg.db")
}else if(str_detect(an_id, "ENSMUSG")){
return("org.Mm.eg.db")
}else if(str_detect(an_id, "ENSDARG")){
return("org.Dr.eg.db")
}else if(str_detect(an_id, "FBgn")){
return("org.Dm.eg.db")
}else{
stop(paste("could not anno db mart from ", an_id))
}
}
#source("http://bioconductor.org/biocLite.R")
#biocLite("org.Mm.eg.db")
#biocLite("org.Hs.eg.db")
#biocLite("org.Dr.eg.db")
#biocLite("org.Dm.eg.db")
#biocLite("KEGG.db")
#biocLite("ReactomePA")
#load_pack(ReactomePA)
## clusterProfiler convenience wrapper for ensembl ids
find_enr_terms = function(ensemblIds, ...){
#browser()
annoDb = guess_anno_db(ensemblIds)
clusterProfiler::bitr(ensemblIds, fromType="ENSEMBL", toType="ENTREZID", OrgDb=annoDb) %>%
with(cp_test(ENTREZID, annoDb=annoDb, cp_species=guess_cp_species(ensemblIds), ...))
}
## does not work because of dots
#find_enr_terms_cached = function(ensemblIds, ...){
# quote({find_enr_terms(ensemblIds, ...)}) %>% cache_it(paste0("cp_id_cache", digest(ensemblIds)))
#}
cp_test = function(geneIds, annoDb, cp_species, q_cutoff=0.05){
# DEBUG geneIds <- glMapped %>% filter(cluster %in% c("cluster_9")) %$% entrez_gene_id %>% as.integer
# DEBUG geneIds <- glMapped %>% filter(cluster_id %in% c("nr1")) %$% entrez_gene_id %>% as.integer
# DEBUG geneIds <- head(glMapped,30)$entrez_gene_id
# geneIds=.$entrez_gene_id