library(gemma.R)
library(dplyr)
get_datasets(query = "bipolar disorder") # plain text search
get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
get_datasets(filter =
"experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985")# specific filters
import gemmapy
api = gemmapy.GemmaPy()
api.get_datasets(query = "bipolar disorder") # plain text search
api.get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
api.get_datasets(filter = "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985") # specific filters
experiment.shortName | experiment.name | experiment.description |
---|---|---|
GSE5389 | Adult postmortem brain tissue (orbitofrontal cortex) from subjects with bipolar disorder and healthy controls | Bipolar affective disorder … |
GSE7036 | Expression profiling in monozygotic twins discordant for bipolar disorder | To identify genes dysregula… |
GSE12654 | Gene expression from human prefrontal cortex (BA10) | We performed the oligonucle… |
GSE46416 | State- and trait-specific gene expression in euthymia and mania | Gene expression profiles of… |
GSE116820 | Expression data of Glutarmatergic neuron and GABAergic neruon induced from iPSCs | We used microarrays to ide… |
GSE66196 | Bipolar disorder and lithium-induced gene expression in two peripheral cell models | Bipolar disorder is a seve… |
dif_exp <- get_differential_expression_values('GSE8397')
contrasts <- get_dataset_differential_expression_analyses('GSE8397')
# identify the contrast of interest
bp_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
all(x$summary == "Parkinson disease")
}),]
frame <- data.frame(
genes = dif_exp[[as.character(bp_contrast$result.ID)]]$GeneSymbol,
ncbi_ids = dif_exp[[as.character(bp_contrast$result.ID)]]$NCBIid,
fdr = p.adjust(dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_pvalue')]],'fdr'),
fold_changes = dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_log2fc')]]
)
# mark differentially expressed genes
frame <- frame %>% mutate(`Differentially Expressed` = fdr<0.05 & abs(fold_changes)>1)
import pandas as pd
from statsmodels.stats.multitest import fdrcorrection as fdr
# differential expression p values, fold changes and statistics
dif_exp = api.get_differential_expression_values('GSE8397')
contrasts = api.get_dataset_differential_expression_analyses('GSE8397')
bp_contrast = contrasts[[(x.summary == "Parkinson disease").all() for x in contrasts.experimental_factors]]
frame = pd.DataFrame({
"genes": dif_exp[bp_contrast.result_ID[0]].GeneSymbol,
'ncbi_ids': dif_exp[bp_contrast.result_ID[0]].NCBIid,
'fdr': fdr(dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_pvalue"])[1],
'fold_changes': dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_log2fc"]
})
frame["Differentially Expressed"] = (frame.fdr<.05) & (frame.fold_changes.abs() > 1)
dif_exp_genes = frame %>% filter(`Differentially Expressed`) %>%
{.$ncbi_ids}
# get a bioconductor object containing
# expression data
expression <- get_dataset_object('GSE8397',
genes = dif_exp_genes,type = 'se')
dif_exp_genes = frame[frame["Differentially Expressed"]].ncbi_ids
# get an AnnData object containing
# expression data
expression = api.get_dataset_object(["GSE8397"],genes = list(dif_exp_genes))