Vignettes

Quick examples

Find datasets relevant to neurodegeneration

R

library(gemma.R)
library(dplyr)
get_datasets(query = "bipolar disorder") # plain text search
get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
get_datasets(filter = 
                 "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985")# specific filters

Python

import gemmapy
api = gemmapy.GemmaPy()
api.get_datasets(query = "bipolar disorder") # plain text search
api.get_datasets(uris = "http://purl.obolibrary.org/obo/MONDO_0004985") # ontology search
api.get_datasets(filter = "experimentalDesign.experimentalFactors.factorValues.characteristics.valueUri = http://purl.obolibrary.org/obo/MONDO_0004985") # specific filters
experiment.shortName experiment.name experiment.description
GSE5389 Adult postmortem brain tissue (orbitofrontal cortex) from subjects with bipolar disorder and healthy controls Bipolar affective disorder …
GSE7036 Expression profiling in monozygotic twins discordant for bipolar disorder To identify genes dysregula…
GSE12654 Gene expression from human prefrontal cortex (BA10) We performed the oligonucle…
GSE46416 State- and trait-specific gene expression in euthymia and mania Gene expression profiles of…
GSE116820 Expression data of Glutarmatergic neuron and GABAergic neruon induced from iPSCs We used microarrays to ide…
GSE66196 Bipolar disorder and lithium-induced gene expression in two peripheral cell models Bipolar disorder is a seve…

Retrieve differential expression in a bipolar disorder study

R

dif_exp <- get_differential_expression_values('GSE8397')
contrasts <- get_dataset_differential_expression_analyses('GSE8397')

# identify the contrast of interest
bp_contrast <- contrasts[contrasts$experimental.factors %>% sapply(\(x){
    all(x$summary == "Parkinson disease")
}),]


frame <- data.frame(
    genes = dif_exp[[as.character(bp_contrast$result.ID)]]$GeneSymbol,
    ncbi_ids = dif_exp[[as.character(bp_contrast$result.ID)]]$NCBIid,
    fdr = p.adjust(dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_pvalue')]],'fdr'),
    fold_changes = dif_exp[[as.character(bp_contrast$result.ID)]][[paste0('contrast_',bp_contrast$contrast.ID,'_log2fc')]]
)

# mark differentially expressed genes
frame <- frame %>% mutate(`Differentially Expressed` = fdr<0.05 & abs(fold_changes)>1)

Python

import pandas as pd
from statsmodels.stats.multitest import fdrcorrection as fdr


# differential expression p values, fold changes and statistics
dif_exp = api.get_differential_expression_values('GSE8397')
contrasts = api.get_dataset_differential_expression_analyses('GSE8397')

bp_contrast = contrasts[[(x.summary == "Parkinson disease").all() for x in contrasts.experimental_factors]]

frame = pd.DataFrame({
    "genes": dif_exp[bp_contrast.result_ID[0]].GeneSymbol,
    'ncbi_ids':  dif_exp[bp_contrast.result_ID[0]].NCBIid,
    'fdr': fdr(dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_pvalue"])[1],
    'fold_changes': dif_exp[bp_contrast.result_ID[0]]['contrast_' + bp_contrast.contrast_ID[0] + "_log2fc"]
})

frame["Differentially Expressed"] = (frame.fdr<.05) & (frame.fold_changes.abs() > 1)

Retrieve gene expression of genes of interest

R

dif_exp_genes = frame %>% filter(`Differentially Expressed`) %>% 
    {.$ncbi_ids}
    
# get a bioconductor object containing
# expression data
expression <- get_dataset_object('GSE8397',
                                 genes = dif_exp_genes,type = 'se')

Python

dif_exp_genes = frame[frame["Differentially Expressed"]].ncbi_ids


# get an AnnData object containing
# expression data
expression = api.get_dataset_object(["GSE8397"],genes = list(dif_exp_genes))