Quick examples

Find datasets relevant to neurodegeneration

R

library(gemma.R)
library(dplyr)
get_datasets(query = "bipolar disorder") # plain text search
get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
get_datasets(filter = 
                 "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985")# specific filters

Python

import gemmapy
api = gemmapy.GemmaPy()
api.get_datasets(query = "bipolar disorder") # plain text search
api.get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
api.get_datasets(filter = "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985") # specific filters

experiment.shortName	experiment.name	experiment.description
GSE5389	Adult postmortem brain tissue (orbitofrontal cortex) from subjects with bipolar disorder and healthy controls	Bipolar affective disorder …
GSE7036	Expression profiling in monozygotic twins discordant for bipolar disorder	To identify genes dysregula…
GSE12654	Gene expression from human prefrontal cortex (BA10)	We performed the oligonucle…
GSE46416	State- and trait-specific gene expression in euthymia and mania	Gene expression profiles of…
GSE116820	Expression data of Glutarmatergic neuron and GABAergic neruon induced from iPSCs	We used microarrays to ide…
GSE66196	Bipolar disorder and lithium-induced gene expression in two peripheral cell models	Bipolar disorder is a seve…

Retrieve differential expression in a bipolar disorder study

R

dif_exp <- get_differential_expression_values('GSE8397')
contrasts <- get_dataset_differential_expression_analyses('GSE8397')

# identify the contrast of interest
bp_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
    all(x$summary == "Parkinson disease")
}),]


frame <- data.frame(
    genes = dif_exp[[as.character(bp_contrast$result.ID)]]$GeneSymbol,
    ncbi_ids = dif_exp[[as.character(bp_contrast$result.ID)]]$NCBIid,
    fdr = p.adjust(dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_pvalue')]],'fdr'),
    fold_changes = dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_log2fc')]]
)

# mark differentially expressed genes
frame <- frame %>% mutate(`Differentially Expressed` = fdr<0.05 & abs(fold_changes)>1)

Python

import pandas as pd
from statsmodels.stats.multitest import fdrcorrection as fdr


# differential expression p values, fold changes and statistics
dif_exp = api.get_differential_expression_values('GSE8397')
contrasts = api.get_dataset_differential_expression_analyses('GSE8397')

bp_contrast = contrasts[[(x.summary == "Parkinson disease").all() for x in contrasts.experimental_factors]]

frame = pd.DataFrame({
    "genes": dif_exp[bp_contrast.result_ID[0]].GeneSymbol,
    'ncbi_ids':  dif_exp[bp_contrast.result_ID[0]].NCBIid,
    'fdr': fdr(dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_pvalue"])[1],
    'fold_changes': dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_log2fc"]
})

frame["Differentially Expressed"] = (frame.fdr<.05) & (frame.fold_changes.abs() > 1)

Retrieve gene expression of genes of interest

R

dif_exp_genes = frame %>% filter(`Differentially Expressed`) %>% 
    {.$ncbi_ids}
    
# get a bioconductor object containing
# expression data
expression <- get_dataset_object('GSE8397',
                                 genes = dif_exp_genes,type = 'se')

Python

dif_exp_genes = frame[frame["Differentially Expressed"]].ncbi_ids


# get an AnnData object containing
# expression data
expression = api.get_dataset_object(["GSE8397"],genes = list(dif_exp_genes))

Vignettes

Quick examples

Find datasets relevant to neurodegeneration

R

Python

Retrieve differential expression in a bipolar disorder study

R

Python

Retrieve gene expression of genes of interest

R

Python