这是Bioc2015的学习笔记。
对于GRanges基础的部分我就跳过了。心得以下几点
如何改变seqnames的风格,比如有些有chr,想变成没有的;有时候没有chr起始,但想让它有。
library(GenomicRanges)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, as.vector, cbind, colnames, do.call,
## duplicated, eval, evalq, get, intersect, is.unsorted, lapply,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rep.int, rownames, sapply, setdiff,
## sort, table, tapply, union, unique, unlist, unsplit
##
## Loading required package: S4Vectors
## Loading required package: stats4
## Creating a generic function for 'nchar' from package 'base' in package 'S4Vectors'
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
gr <- GRanges("chr1", IRanges(seq(1000000, 2000000, by=10000), width=500), "+")
library(GenomeInfoDb)
seqlevelsStyle(gr)
## [1] "UCSC"
seqlevelsStyle(gr) <- "NCBI"
gr
## GRanges object with 101 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] 1 [1000000, 1000499] +
## [2] 1 [1010000, 1010499] +
## [3] 1 [1020000, 1020499] +
## [4] 1 [1030000, 1030499] +
## [5] 1 [1040000, 1040499] +
## ... ... ... ...
## [97] 1 [1960000, 1960499] +
## [98] 1 [1970000, 1970499] +
## [99] 1 [1980000, 1980499] +
## [100] 1 [1990000, 1990499] +
## [101] 1 [2000000, 2000499] +
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
seqlevelsStyle(gr) <- "UCSC"
gr
## GRanges object with 101 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [1000000, 1000499] +
## [2] chr1 [1010000, 1010499] +
## [3] chr1 [1020000, 1020499] +
## [4] chr1 [1030000, 1030499] +
## [5] chr1 [1040000, 1040499] +
## ... ... ... ...
## [97] chr1 [1960000, 1960499] +
## [98] chr1 [1970000, 1970499] +
## [99] chr1 [1980000, 1980499] +
## [100] chr1 [1990000, 1990499] +
## [101] chr1 [2000000, 2000499] +
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
如何获取GRangesList的每一个元素的长度
gr$count <- rpois(length(gr), 5)
gr
## GRanges object with 101 ranges and 1 metadata column:
## seqnames ranges strand | count
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr1 [1000000, 1000499] + | 7
## [2] chr1 [1010000, 1010499] + | 4
## [3] chr1 [1020000, 1020499] + | 7
## [4] chr1 [1030000, 1030499] + | 5
## [5] chr1 [1040000, 1040499] + | 7
## ... ... ... ... ... ...
## [97] chr1 [1960000, 1960499] + | 4
## [98] chr1 [1970000, 1970499] + | 8
## [99] chr1 [1980000, 1980499] + | 5
## [100] chr1 [1990000, 1990499] + | 6
## [101] chr1 [2000000, 2000499] + | 3
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
grl <- split(gr, gr$count)
class(grl)
## [1] "GRangesList"
## attr(,"package")
## [1] "GenomicRanges"
tc <- table(elementLengths(grl))
tc
##
## 1 4 7 8 10 11 15 19
## 4 2 1 1 1 1 1 2
grl[elementLengths(grl)==as.numeric(names(tc[length(tc)]))]
## GRangesList object of length 2:
## $4
## GRanges object with 19 ranges and 1 metadata column:
## seqnames ranges strand | count
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr1 [1010000, 1010499] + | 4
## [2] chr1 [1070000, 1070499] + | 4
## [3] chr1 [1260000, 1260499] + | 4
## [4] chr1 [1270000, 1270499] + | 4
## [5] chr1 [1290000, 1290499] + | 4
## ... ... ... ... ... ...
## [15] chr1 [1800000, 1800499] + | 4
## [16] chr1 [1840000, 1840499] + | 4
## [17] chr1 [1900000, 1900499] + | 4
## [18] chr1 [1940000, 1940499] + | 4
## [19] chr1 [1960000, 1960499] + | 4
##
## ...
## <1 more element>
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
relist一个unlist之后的GRanges
gr <- unlist(grl)
grl2 <- relist(flesh=gr, skeleton=grl)
grl2
## GRangesList object of length 13:
## $0
## GRanges object with 1 range and 1 metadata column:
## seqnames ranges strand | count
## <Rle> <IRanges> <Rle> | <integer>
## 0 chr1 [1160000, 1160499] + | 0
##
## $1
## GRanges object with 4 ranges and 1 metadata column:
## seqnames ranges strand | count
## 1 chr1 [1580000, 1580499] + | 1
## 1 chr1 [1780000, 1780499] + | 1
## 1 chr1 [1890000, 1890499] + | 1
## 1 chr1 [1930000, 1930499] + | 1
##
## $2
## GRanges object with 8 ranges and 1 metadata column:
## seqnames ranges strand | count
## 2 chr1 [1090000, 1090499] + | 2
## 2 chr1 [1190000, 1190499] + | 2
## 2 chr1 [1220000, 1220499] + | 2
## 2 chr1 [1510000, 1510499] + | 2
## 2 chr1 [1550000, 1550499] + | 2
## 2 chr1 [1710000, 1710499] + | 2
## 2 chr1 [1850000, 1850499] + | 2
## 2 chr1 [1950000, 1950499] + | 2
##
## ...
## <10 more elements>
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
identical(grl, grl2)
## [1] FALSE