Gets alternative gene annotations from biomaRt

Uses getBM to get alternative gene IDs for ReducedExperiment objects. The new annotations are added as columns to the input object's rowData

Usage

# S4 method for class 'ReducedExperiment'
getGeneIDs(
  object,
  gene_id_col = "rownames",
  gene_id_type = "ensembl_gene_id",
  ids_to_get = c("hgnc_symbol", "entrezgene_id"),
  dataset = "hsapiens_gene_ensembl",
  mart = NULL,
  biomart_out = NULL
)

Arguments

object: ReducedExperiment object.
gene_id_col: The column in rowData(object) that will be used to query biomaRt. Setting this to "rownames" instead uses rownames(object) for matching.
gene_id_type: The type of attribute to be used to query with biomaRt. See the filters argument of getBM.
ids_to_get: The type of attribute to get from biomaRt. See the attributes argument of getBM.
dataset: The Ensembl dataset to retrieve. See the dataset argument of useEnsembl. If mart is not NULL, this argument is ignored.
mart: An optional mart object to use. See the mart argument of getBM. If provided, this object is used to query biomart for the conversion of gene IDs. If biomart_out is not NULL, this argument is ignored.
biomart_out: An optional data.frame containing the output of a call to getBM. If provided, this object is used for the conversion of gene IDs.

Value

Returns the original object, with additional variables added to the rowData slot.

Author

Jack Gisby

Examples

set.seed(2)
airway <- ReducedExperiment:::.getAirwayData(n_features = 500)

set.seed(1)
airway_fe <- estimateFactors(airway, nc = 2, use_stability = FALSE, method = "imax")

# rowData before getting additional gene IDs
rowData(airway_fe)
#> DataFrame with 500 rows and 10 columns
#>                         gene_id   gene_name  entrezid   gene_biotype
#>                     <character> <character> <integer>    <character>
#> ENSG00000186523 ENSG00000186523     FAM86B1        NA protein_coding
#> ENSG00000203871 ENSG00000203871    C6orf164        NA protein_coding
#> ENSG00000127125 ENSG00000127125        PPCS        NA protein_coding
#> ENSG00000177692 ENSG00000177692     DNAJC28        NA protein_coding
#> ENSG00000161249 ENSG00000161249        DMKN        NA protein_coding
#> ...                         ...         ...       ...            ...
#> ENSG00000113361 ENSG00000113361        CDH6        NA protein_coding
#> ENSG00000077152 ENSG00000077152       UBE2T        NA protein_coding
#> ENSG00000115963 ENSG00000115963        RND3        NA protein_coding
#> ENSG00000147439 ENSG00000147439        BIN3        NA protein_coding
#> ENSG00000116661 ENSG00000116661       FBXO2        NA protein_coding
#>                 gene_seq_start gene_seq_end    seq_name seq_strand
#>                      <integer>    <integer> <character>  <integer>
#> ENSG00000186523       12039605     12051642           8         -1
#> ENSG00000203871       88106840     88109467           6          1
#> ENSG00000127125       42921788     42939056           1          1
#> ENSG00000177692       34860497     34864027          21         -1
#> ENSG00000161249       35988122     36004560          19         -1
#> ...                        ...          ...         ...        ...
#> ENSG00000113361       31193857     31329253           5          1
#> ENSG00000077152      202300785    202311108           1         -1
#> ENSG00000115963      151324709    151395525           2         -1
#> ENSG00000147439       22477931     22526661           8         -1
#> ENSG00000116661       11708424     11715842           1         -1
#>                 seq_coord_system      symbol
#>                        <integer> <character>
#> ENSG00000186523               NA     FAM86B1
#> ENSG00000203871               NA    C6orf164
#> ENSG00000127125               NA        PPCS
#> ENSG00000177692               NA     DNAJC28
#> ENSG00000161249               NA        DMKN
#> ...                          ...         ...
#> ENSG00000113361               NA        CDH6
#> ENSG00000077152               NA       UBE2T
#> ENSG00000115963               NA        RND3
#> ENSG00000147439               NA        BIN3
#> ENSG00000116661               NA       FBXO2

# For this example we run `getGeneIDs` using a preloaded biomart query
# (`biomart_out`) to avoid actually querying ensembl during testing
# Note: do not use this file for your actual data
biomart_out <- readRDS(system.file(
    "extdata",
    "biomart_out.rds",
    package = "ReducedExperiment"
))
airway_fe <- getGeneIDs(airway_fe, biomart_out = biomart_out)

# rowData after getting additional gene IDs
rowData(airway_fe)
#> DataFrame with 500 rows and 13 columns
#>                 ensembl_gene_id         gene_id   gene_name  entrezid
#>                     <character>     <character> <character> <integer>
#> ENSG00000186523 ENSG00000186523 ENSG00000186523     FAM86B1        NA
#> ENSG00000203871 ENSG00000203871 ENSG00000203871    C6orf164        NA
#> ENSG00000127125 ENSG00000127125 ENSG00000127125        PPCS        NA
#> ENSG00000177692 ENSG00000177692 ENSG00000177692     DNAJC28        NA
#> ENSG00000161249 ENSG00000161249 ENSG00000161249        DMKN        NA
#> ...                         ...             ...         ...       ...
#> ENSG00000113361 ENSG00000113361 ENSG00000113361        CDH6        NA
#> ENSG00000077152 ENSG00000077152 ENSG00000077152       UBE2T        NA
#> ENSG00000115963 ENSG00000115963 ENSG00000115963        RND3        NA
#> ENSG00000147439 ENSG00000147439 ENSG00000147439        BIN3        NA
#> ENSG00000116661 ENSG00000116661 ENSG00000116661       FBXO2        NA
#>                   gene_biotype gene_seq_start gene_seq_end    seq_name
#>                    <character>      <integer>    <integer> <character>
#> ENSG00000186523 protein_coding       12039605     12051642           8
#> ENSG00000203871 protein_coding       88106840     88109467           6
#> ENSG00000127125 protein_coding       42921788     42939056           1
#> ENSG00000177692 protein_coding       34860497     34864027          21
#> ENSG00000161249 protein_coding       35988122     36004560          19
#> ...                        ...            ...          ...         ...
#> ENSG00000113361 protein_coding       31193857     31329253           5
#> ENSG00000077152 protein_coding      202300785    202311108           1
#> ENSG00000115963 protein_coding      151324709    151395525           2
#> ENSG00000147439 protein_coding       22477931     22526661           8
#> ENSG00000116661 protein_coding       11708424     11715842           1
#>                 seq_strand seq_coord_system      symbol hgnc_symbol
#>                  <integer>        <integer> <character> <character>
#> ENSG00000186523         -1               NA     FAM86B1     FAM86B1
#> ENSG00000203871          1               NA    C6orf164          NA
#> ENSG00000127125          1               NA        PPCS        PPCS
#> ENSG00000177692         -1               NA     DNAJC28     DNAJC28
#> ENSG00000161249         -1               NA        DMKN        DMKN
#> ...                    ...              ...         ...         ...
#> ENSG00000113361          1               NA        CDH6        CDH6
#> ENSG00000077152         -1               NA       UBE2T       UBE2T
#> ENSG00000115963         -1               NA        RND3        RND3
#> ENSG00000147439         -1               NA        BIN3        BIN3
#> ENSG00000116661         -1               NA       FBXO2       FBXO2
#>                 entrezgene_id
#>                     <integer>
#> ENSG00000186523         85002
#> ENSG00000203871            NA
#> ENSG00000127125         79717
#> ENSG00000177692         54943
#> ENSG00000161249         93099
#> ...                       ...
#> ENSG00000113361          1004
#> ENSG00000077152         29089
#> ENSG00000115963           390
#> ENSG00000147439         55909
#> ENSG00000116661         26232

Usage

Arguments

Value

See also

Author

Examples