Gets alternative gene annotations from biomaRt
Source:R/methods-ReducedExperiment.R
get_gene_ids.Rd
Uses getBM to get alternative gene IDs for
ReducedExperiment objects. The new annotations
are added as columns to the input object's rowData
Usage
# S4 method for class 'ReducedExperiment'
getGeneIDs(
object,
gene_id_col = "rownames",
gene_id_type = "ensembl_gene_id",
ids_to_get = c("hgnc_symbol", "entrezgene_id"),
dataset = "hsapiens_gene_ensembl",
mart = NULL,
biomart_out = NULL
)
Arguments
- object
ReducedExperiment object.
- gene_id_col
The column in
rowData(object)
that will be used to query biomaRt. Setting this to "rownames" instead usesrownames(object)
for matching.- gene_id_type
The type of attribute to be used to query with biomaRt. See the
filters
argument of getBM.- ids_to_get
The type of attribute to get from biomaRt. See the
attributes
argument of getBM.- dataset
The Ensembl dataset to retrieve. See the
dataset
argument of useEnsembl. Ifmart
is notNULL
, this argument is ignored.- mart
An optional mart object to use. See the
mart
argument of getBM. If provided, this object is used to query biomart for the conversion of gene IDs. Ifbiomart_out
is notNULL
, this argument is ignored.- biomart_out
An optional
data.frame
containing the output of a call to getBM. If provided, this object is used for the conversion of gene IDs.
Examples
set.seed(2)
airway <- ReducedExperiment:::.getAirwayData(n_features = 500)
set.seed(1)
airway_fe <- estimateFactors(airway, nc = 2, use_stability = FALSE, method = "imax")
# rowData before getting additional gene IDs
rowData(airway_fe)
#> DataFrame with 500 rows and 10 columns
#> gene_id gene_name entrezid gene_biotype
#> <character> <character> <integer> <character>
#> ENSG00000186523 ENSG00000186523 FAM86B1 NA protein_coding
#> ENSG00000203871 ENSG00000203871 C6orf164 NA protein_coding
#> ENSG00000127125 ENSG00000127125 PPCS NA protein_coding
#> ENSG00000177692 ENSG00000177692 DNAJC28 NA protein_coding
#> ENSG00000161249 ENSG00000161249 DMKN NA protein_coding
#> ... ... ... ... ...
#> ENSG00000113361 ENSG00000113361 CDH6 NA protein_coding
#> ENSG00000077152 ENSG00000077152 UBE2T NA protein_coding
#> ENSG00000115963 ENSG00000115963 RND3 NA protein_coding
#> ENSG00000147439 ENSG00000147439 BIN3 NA protein_coding
#> ENSG00000116661 ENSG00000116661 FBXO2 NA protein_coding
#> gene_seq_start gene_seq_end seq_name seq_strand
#> <integer> <integer> <character> <integer>
#> ENSG00000186523 12039605 12051642 8 -1
#> ENSG00000203871 88106840 88109467 6 1
#> ENSG00000127125 42921788 42939056 1 1
#> ENSG00000177692 34860497 34864027 21 -1
#> ENSG00000161249 35988122 36004560 19 -1
#> ... ... ... ... ...
#> ENSG00000113361 31193857 31329253 5 1
#> ENSG00000077152 202300785 202311108 1 -1
#> ENSG00000115963 151324709 151395525 2 -1
#> ENSG00000147439 22477931 22526661 8 -1
#> ENSG00000116661 11708424 11715842 1 -1
#> seq_coord_system symbol
#> <integer> <character>
#> ENSG00000186523 NA FAM86B1
#> ENSG00000203871 NA C6orf164
#> ENSG00000127125 NA PPCS
#> ENSG00000177692 NA DNAJC28
#> ENSG00000161249 NA DMKN
#> ... ... ...
#> ENSG00000113361 NA CDH6
#> ENSG00000077152 NA UBE2T
#> ENSG00000115963 NA RND3
#> ENSG00000147439 NA BIN3
#> ENSG00000116661 NA FBXO2
# For this example we run `getGeneIDs` using a preloaded biomart query
# (`biomart_out`) to avoid actually querying ensembl during testing
# Note: do not use this file for your actual data
biomart_out <- readRDS(system.file(
"extdata",
"biomart_out.rds",
package = "ReducedExperiment"
))
airway_fe <- getGeneIDs(airway_fe, biomart_out = biomart_out)
# rowData after getting additional gene IDs
rowData(airway_fe)
#> DataFrame with 500 rows and 13 columns
#> ensembl_gene_id gene_id gene_name entrezid
#> <character> <character> <character> <integer>
#> ENSG00000186523 ENSG00000186523 ENSG00000186523 FAM86B1 NA
#> ENSG00000203871 ENSG00000203871 ENSG00000203871 C6orf164 NA
#> ENSG00000127125 ENSG00000127125 ENSG00000127125 PPCS NA
#> ENSG00000177692 ENSG00000177692 ENSG00000177692 DNAJC28 NA
#> ENSG00000161249 ENSG00000161249 ENSG00000161249 DMKN NA
#> ... ... ... ... ...
#> ENSG00000113361 ENSG00000113361 ENSG00000113361 CDH6 NA
#> ENSG00000077152 ENSG00000077152 ENSG00000077152 UBE2T NA
#> ENSG00000115963 ENSG00000115963 ENSG00000115963 RND3 NA
#> ENSG00000147439 ENSG00000147439 ENSG00000147439 BIN3 NA
#> ENSG00000116661 ENSG00000116661 ENSG00000116661 FBXO2 NA
#> gene_biotype gene_seq_start gene_seq_end seq_name
#> <character> <integer> <integer> <character>
#> ENSG00000186523 protein_coding 12039605 12051642 8
#> ENSG00000203871 protein_coding 88106840 88109467 6
#> ENSG00000127125 protein_coding 42921788 42939056 1
#> ENSG00000177692 protein_coding 34860497 34864027 21
#> ENSG00000161249 protein_coding 35988122 36004560 19
#> ... ... ... ... ...
#> ENSG00000113361 protein_coding 31193857 31329253 5
#> ENSG00000077152 protein_coding 202300785 202311108 1
#> ENSG00000115963 protein_coding 151324709 151395525 2
#> ENSG00000147439 protein_coding 22477931 22526661 8
#> ENSG00000116661 protein_coding 11708424 11715842 1
#> seq_strand seq_coord_system symbol hgnc_symbol
#> <integer> <integer> <character> <character>
#> ENSG00000186523 -1 NA FAM86B1 FAM86B1
#> ENSG00000203871 1 NA C6orf164 NA
#> ENSG00000127125 1 NA PPCS PPCS
#> ENSG00000177692 -1 NA DNAJC28 DNAJC28
#> ENSG00000161249 -1 NA DMKN DMKN
#> ... ... ... ... ...
#> ENSG00000113361 1 NA CDH6 CDH6
#> ENSG00000077152 -1 NA UBE2T UBE2T
#> ENSG00000115963 -1 NA RND3 RND3
#> ENSG00000147439 -1 NA BIN3 BIN3
#> ENSG00000116661 -1 NA FBXO2 FBXO2
#> entrezgene_id
#> <integer>
#> ENSG00000186523 85002
#> ENSG00000203871 NA
#> ENSG00000127125 79717
#> ENSG00000177692 54943
#> ENSG00000161249 93099
#> ... ...
#> ENSG00000113361 1004
#> ENSG00000077152 29089
#> ENSG00000115963 390
#> ENSG00000147439 55909
#> ENSG00000116661 26232