Genomic Ranges

rle

Run Length Encoding Integer Ranges Interval Ranges

chr.str <- c(rep("ChrI", 3000), rep("ChrII", 5000), rep("ChrIII", 2000),rep("ChrIV", 7000))
v <- rle(chr.str)
as.vector(v)
# Run Length Encoding
#   lengths: int [1:4] 3000 5000 2000 7000
#   values : chr [1:4] "ChrI" "ChrII" "ChrIII" "ChrIV"
as.character(v)
# [1] "c(3000, 5000, 2000, 7000)"                    
# [2] "c(\"ChrI\", \"ChrII\", \"ChrIII\", \"ChrIV\")"
identical(inverse.rle(v),chr.str)
# TRUE

Even though factors are basically just integer vectors with some information about levels attached, the rle() function doesn’t work with factors. One issue that might be problematic is that each NA is treated as a run of length 1, even if the NA’s are next to each other.

is.na(chr.str) <- 2:20
# rle(factor(chr.str))
# 'x' must be a vector of an atomic type
rle(chr.str)
# Run Length Encoding
#   lengths: int [1:24] 1 1 1 1 1 1 1 1 1 1 ...
#   values : chr [1:24] "ChrI" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "ChrI" "ChrII" "ChrIII" "ChrIV"
rrle <- function(x,...){
  cx <- as.character(x)
  cx[is.na(cx)] <- "ZZZ"
  v <- rle(cx)
  # Replace the ZZZ's with NA in the RLE-coded data
  v$values[ v$values=="ZZZ" ] <- NA
  return(v)

}
rrle(chr.str)
rrle(factor(chr.str))
# Run Length Encoding
#   lengths: int [1:6] 1 19 2980 5000 2000 7000
#   values : chr [1:6] "ChrI" NA "ChrI" "ChrII" "ChrIII" "ChrIV"

# object size
as.vector(object.size(rrle(chr.str))/object.size(chr.str))
# 0.006459569

# but
set.seed(1)
rchr.str <- chr.str[sample(length(chr.str))]
as.vector(object.size(rrle(rchr.str))/object.size(chr.str))
# 1.049797

Rle

library(IRanges)
chr.rle <- Rle(chr.str) # as(chr.str, "Rle")
# character-Rle of length 17000 with 6 runs
#   Lengths:        1       19     2980     5000     2000     7000
#   Values :   "ChrI"     "NA"   "ChrI"  "ChrII" "ChrIII"  "ChrIV"
identical(as.character(chr.rle), chr.str)
# TRUE

chr.rle
chr.rle@values[2] <- "ChrI"
chr.rle <- Rle(as.character(chr.rle))
# character-Rle of length 17000 with 4 runs
#   Lengths:     3000     5000     2000     7000
#   Values :   "ChrI"  "ChrII" "ChrIII"  "ChrIV"
chr.rle[1:5] #== chr.rle[IRanges(1,5)]
# character-Rle of length 5 with 1 run
#   Lengths:      5
#   Values : "ChrI"
subset(chr.rle,chr.rle>="ChrII")
# character-Rle of length 14000 with 3 runs
#   Lengths:     5000     2000     7000
#   Values :  "ChrII" "ChrIII"  "ChrIV"
c(chr.rle,chr.rle) #== rep(chr.rle,2) append(chr.rle,chr.rle)
# character-Rle of length 34000 with 8 runs
#   Lengths:     3000     5000 ...     2000     7000
#   Values :   "ChrI"  "ChrII" ... "ChrIII"  "ChrIV"
chr.rle <- Rle(values = c("ChrI", "ChrII", "ChrIII", "ChrIV", "ChrV","ChrII"),lengths = c(1000,2000,4000,3500,1000, 500))
# character-Rle of length 12000 with 6 runs
#   Lengths:     1000     2000     4000     3500     1000      500
#   Values :   "ChrI"  "ChrII" "ChrIII"  "ChrIV"   "ChrV"  "ChrII"
str(chr.rle)
# Formal class 'Rle' [package "S4Vectors"] with 4 slots
#   ..@ values         : chr [1:6] "ChrI" "ChrII" "ChrIII" "ChrIV" ...
#   ..@ lengths        : int [1:6] 1000 2000 4000 3500 1000 500
#   ..@ elementMetadata: NULL
#   ..@ metadata       : list()
length(chr.rle)
# [1] 12000
runLength(chr.rle)
# 1000 2000 4000 3500 1000  500
runValue(chr.rle)
#  "ChrI"   "ChrII"  "ChrIII" "ChrIV"  "ChrV"   "ChrII" 
nrun(chr.rle)
# 6
rbind(start(chr.rle),end(chr.rle),width(chr.rle))
#      [,1] [,2] [,3]  [,4]  [,5]  [,6]
# [1,]    1 1001 3001  7001 10501 11501
# [2,] 1000 3000 7000 10500 11500 12000
# [3,] 1000 2000 4000  3500  1000   500
range(chr.rle)
# [1] "ChrI" "ChrV"

runLength(chr.rle) <- rep(2500, nrun(chr.rle))
runValue(chr.rle)[3:4] <- c("ChrIV", "ChrIIV")
chr.rle
# character-Rle of length 15000 with 6 runs
#   Lengths:     2500     2500     2500     2500     2500     2500
#   Values :   "ChrI"  "ChrII"  "ChrIV" "ChrIIV"   "ChrV"  "ChrII"

identical(as.factor(chr.rle),factor(as.character(chr.rle)))
# TRUE
set.seed(0)
rle1 <- Rle(sample(4, 6, replace = TRUE))
rle2 <- Rle(sample(5, 12, replace = TRUE))
# rle1
# Lengths: 2 1 1 1 1
# Values : 2 4 1 4 2
# rle2
# Lengths: 1 2 1 2 1 1 1 1 1 1
# Values : 5 2 3 5 2 4 5 3 4 2

identical(rle1 + rle2,Rle(as.integer(rle1)+as.integer(rle2)))
# TRUE
identical(rle1 * rle2,Rle(as.integer(rle1)*as.integer(rle2)))
# TRUE
sqrt(rle1)
# numeric-Rle of length 6 with 5 runs
  # Lengths:                1                2                1                1                1
  # Values :                2  1.4142135623731 1.73205080756888                2                1
rle1 > 2 | rle2 > 4
# logical-Rle of length 12 with 8 runs
#   Lengths:     2     1     2     1     1     2     2     1
#   Values :  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE
sum(rle1 > 2 | rle2 > 4)
# 7

paste(rle1, rle2, sep = "")
# character-Rle of length 12 with 12 runs
#   Lengths:    1    1    1    1    1    1    1    1    1    1    1    1
#   Values : "25" "22" "42" "13" "45" "25" "22" "24" "45" "13" "44" "22"

IRanges

Ranges

ir1 <- IRanges(start = 1:10, width = 10:1)
# IRanges object with 10 ranges and 0 metadata columns:
#            start       end     width
#        <integer> <integer> <integer>
#    [1]         1        10        10
#    [2]         2        10         9
#    [3]         3        10         8
#    [4]         4        10         7
#    [5]         5        10         6
#    [6]         6        10         5
#    [7]         7        10         4
#    [8]         8        10         3
#    [9]         9        10         2
#   [10]        10        10         1
head(ir1)[1:2];tail(ir1)[1:2];rev(ir1)[1:2]
# IRanges object with 2 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]         1        10        10
#   [2]         2        10         9
# IRanges object with 2 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]         5        10         6
#   [2]         6        10         5
# IRanges object with 2 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]        10        10         1
#   [2]         9        10         2
subset(ir1,width(ir1)>=5)
# IRanges object with 6 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]         1        10        10
#   [2]         2        10         9
#   [3]         3        10         8
#   [4]         4        10         7
#   [5]         5        10         6
#   [6]         6        10         5
ir1[IRanges(start = c(1,2),width = 2)]
# IRanges object with 4 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]         1        10        10
#   [2]         2        10         9
#   [3]         2        10         9
#   [4]         3        10         8
ir1[[2]]
# [1]  2  3  4  5  6  7  8  9 10

ir2 <- ir1
start(ir2) <- c(6:10,1:5)
window(ir2,5) #== window(ir,5,10)
# IRanges object with 6 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]        10        10         1
#   [2]         1        10        10
#   [3]         2        10         9
#   [4]         3        10         8
#   [5]         4        10         7
#   [6]         5        10         6
plotRanges(ir2,addlabel = T)

IRanges: endoapply~lapply, and sapply, for looping over sequences and two generics, aggregate and shiftApply, to perform calculations over subsequences.

The aggregate function combines sequence extraction functionality of the window function with looping capabilities of the sapply function.

aggregate(ir2, start = 1:8, width = 3, FUN = mean)
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
# [1,]  8.0  8.5  9.0  9.5 10.0  5.5  6.0  6.5
# [2,]  8.5  9.0  9.5 10.0  5.5  6.0  6.5  7.0
# [3,]  9.0  9.5 10.0  5.5  6.0  6.5  7.0  7.5

The shiftApply function is a looping operation involving two sequences whose elements are lined up via a positional shift operation.

idx <- seq(9)
shifts <-  shiftApply(idx,ir2,ir1,identical)
plot(x=idx,y=shifts,type="b")

Intra-range transformations

ir <- IRanges(c(1000, 2000,3000,1500,2500,4000), width = c(500, 1000,500,1000,2000,2500))
  #         start       end     width
  #     <integer> <integer> <integer>
  # [1]      1000      1499       500
  # [2]      2000      2999      1000
  # [3]      3000      3499       500
  # [4]      1500      2499      1000
  # [5]      2500      4499      2000
  # [6]      4000      6499      2500
ir + seq(-200,300,100)
## IRanges object with 6 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]      1200      1299       100
##   [2]      2100      2899       800
##   [3]      3000      3499       500
##   [4]      1400      2599      1200
##   [5]      2300      4699      2400
##   [6]      3700      6799      3100
# IRanges object with 6 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]      1200      1299       100
#   [2]      2100      2899       800
#   [3]      3000      3499       500
#   [4]      1400      2599      1200
#   [5]      2300      4699      2400
#   [6]      3700      6799      3100

#  positive contracts (zooms in) and negative expands (zooms out)
ir*2
## IRanges object with 6 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]      1125      1374       250
##   [2]      2250      2749       500
##   [3]      3125      3374       250
##   [4]      1750      2249       500
##   [5]      3000      3999      1000
##   [6]      4625      5874      1250
plotRanges(ir[-3]*2,shift(ir[-3]*-2,6000),height = .5,sep=.4,colors = c("red","gray20","white"))

Shift

shift(x, shift=0L, use.names=TRUE)

plotRanges(ir,shift(ir, -5000),shift(ir, 1000),height = .5,sep=.4)

Narrow

narrow(x, start=NA, end=NA, width=NA, use.names=TRUE)

plotRanges(ir,narrow(ir, 100),narrow(ir, -200),colors = c("red","green","blue"),addlabel = T)

narrow(ir, 100)
## IRanges object with 6 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]      1099      1499       401
##   [2]      2099      2999       901
##   [3]      3099      3499       401
##   [4]      1599      2499       901
##   [5]      2599      4499      1901
##   [6]      4099      6499      2401
# IRanges object with 6 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]      1099      1499       401
#   [2]      2099      2999       901
#   [3]      3099      3499       401
#   [4]      1599      2499       901
#   [5]      2599      4499      1901
#   [6]      4099      6499      2401
narrow(ir, -200)
## IRanges object with 6 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]      1300      1499       200
##   [2]      2800      2999       200
##   [3]      3300      3499       200
##   [4]      2300      2499       200
##   [5]      4300      4499       200
##   [6]      6300      6499       200
# IRanges object with 6 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]      1300      1499       200
#   [2]      2800      2999       200
#   [3]      3300      3499       200
#   [4]      2300      2499       200
#   [5]      4300      4499       200
#   [6]      6300      6499       200

Flank

flank(x, width, start=TRUE, both=FALSE, use.names=TRUE, …)

plotRanges(ir,flank(ir, 100),flank(ir, -100),colors = c("red","green","blue"))

plotRanges(ir,flank(ir, 100,both = T),flank(ir, -100,start=F),colors = c("red","green","blue"))

Reflect

reflect(x, bounds, use.names=TRUE)

bounds <- IRanges(1000, width = 1000)
ir.trans <- reflect(ir, bounds = bounds)
plotRanges(ir,ir.trans,bounds,colors = c("red","green","blue"))

Promoters

promoters(x, upstream=2000, downstream=200, …)

ir.trans <- promoters(ir, upstream = 200, downstream = 100)
plotRanges(ir,ir.trans,addlabel = T,label = 1:6)

Resize

resize(x, width, fix=“start”, use.names=TRUE, …)

plotRanges(ir,resize(ir, width = c(100, 500),fix = "end"),resize(ir, width = c(100, 500), fix = "center"))

dat <- as.data.frame(rbind(as.data.frame(ir),as.data.frame(resize(ir, width = c(100, 500)), fix = "start"),
      as.data.frame(resize(ir, width = c(100, 500), fix = "center"))))
rownames(dat) <- paste0(rep(LETTERS[1:3],each=6),1:6)
plotRanges(IRanges(start = dat$start,end=dat$end),label = rownames(dat))

### Restrict restrict(x, start=NA, end=NA, keep.all.ranges=FALSE, use.names=TRUE)

ir.trans1 <- restrict(ir,200,2000)
ir.trans2 <- restrict(ir,3000,3500)
plotRanges(ir,ir.trans1,ir.trans2)

### Threebands

ir.trans <- threebands(ir, start=40, end=-200)
plotRanges(ir,ir.trans$left,ir.trans$right)

plotRanges(ir,narrow(ir,40,-200),ir.trans$middle)

Inter-range transformations

ir <- IRanges(c(200, 1000, 3000, 2500,3500,1500,4000), width = c(600, 1000, 300, 850,500,700,250))
# IRanges object with 7 ranges and 0 metadata columns:
#           start       end     width
#       <integer> <integer> <integer>
#   [1]       200       799       600
#   [2]      1000      1999      1000
#   [3]      3000      3299       300
#   [4]      2500      3349       850
#   [5]      3500      3999       500
#   [6]      1500      2199       700
#   [7]      4000      4249       250
ir.trans <- range(ir)
plotRanges(ir,ir.trans,alpha = .2)

plotRanges(ir,reduce(ir),group = F,colors = c("gray90","black","white"))

plotRanges(ir,gaps(ir),group = F)

plotRanges(ir,disjoin(ir),group = F)

# IRanges object with 9 ranges and 0 metadata columns:
  #         start       end     width
  #     <integer> <integer> <integer>
  # [1]       200       799       600
  # [2]      1000      1499       500
  # [3]      1500      1999       500
  # [4]      2000      2199       200
  # [5]      2500      2999       500
  # [6]      3000      3299       300
  # [7]      3300      3349        50
  # [8]      3500      3999       500
  # [9]      4000      4249       250
findOverlaps(ir) #== findOverlaps(ir,ir)
## SelfHits object with 11 hits and 0 metadata columns:
##        queryHits subjectHits
##        <integer>   <integer>
##    [1]         1           1
##    [2]         2           2
##    [3]         2           6
##    [4]         3           4
##    [5]         3           3
##    [6]         4           4
##    [7]         4           3
##    [8]         5           5
##    [9]         6           2
##   [10]         6           6
##   [11]         7           7
##   -
##   queryLength: 7 / subjectLength: 7
# SelfHits object with 11 hits and 0 metadata columns:
#        queryHits subjectHits
#        <integer>   <integer>
#    [1]         1           1
#    [2]         2           2
#    [3]         2           6
#    [4]         3           4
#    [5]         3           3
#    [6]         4           4
#    [7]         4           3
#    [8]         5           5
#    [9]         6           2
#   [10]         6           6
#   [11]         7           7
#   -
#   queryLength: 7 / subjectLength: 7
countOverlaps(ir)
## [1] 1 2 2 2 1 2 1
# 1 2 2 2 1 2 1

coverage

# coverage(ir)
# integer-Rle of length 4249 with 12 runs
#   Lengths: 199 600 200 500 500 200 300 500 300  50 150 750
#   Values :   0   1   0   1   2   1   0   1   2   1   0   1
cov <- as.vector(coverage(ir))
mat <- as.data.frame(cbind(seq_along(cov), cov))
plotRanges(ir,height = 1) + geom_line(aes(x=V1,y=cov),color="red",lwd=1,data=mat)

Union

intersect求交集区域;setdiff求差异区域;union求并集

ir1 <- IRanges(c(200, 1000, 3000, 2500,4000,6500), width = c(600, 1000, 800, 850,400,200))
ir2 <- IRanges(c(100, 1500, 2000, 3500,5000,7000), width = c(500, 800, 1000, 550,700,400))
distance(ir1,ir2)
## [1]   0   0   0 150 600 300
# [1]    0    0    0  150  600 300
plotRanges(ir1,ir2,colors=c("gray90","gray10","red"),alpha = .5,addlabel = T)

plotRanges(intersect(ir1, ir2))

plotRanges(setdiff(ir1, ir2))

plotRanges(union(ir1, ir2))

plotRanges(punion(ir1, ir2,fill.gap=T))

GRanges

gr1 <- GRanges(seqnames = Rle(c("Chr1", "Chr2","Chr3", "Chr3"), c(5:3,2)),
                   ranges = IRanges(start = c(1300,1050, 2000,5000,seq(1000,5500,500)), 
                                  end = c(2500, 1870, 3200,5800,seq(2000,11000,1000)),
                                  names = head(letters,14)), 
                   strand = Rle(c("+", "+", "-","-","+"),c(1:4,4)), 
                   seqlengths = c(Chr1 = 1e+06,Chr2 = 2.5e+06,Chr3 = 2e+06))
# GRanges object with 14 ranges and 0 metadata columns:
#     seqnames        ranges strand
#        <Rle>     <IRanges>  <Rle>
#   a     Chr1  [1300, 2500]      +
#   b     Chr1  [1050, 1870]      +
#   c     Chr1  [2000, 3200]      +
#   d     Chr1  [5000, 5800]      -
#   e     Chr1  [1000, 2000]      -
#   .      ...           ...    ...
#   j     Chr3 [3500,  7000]      -
#   k     Chr3 [4000,  8000]      +
#   l     Chr3 [4500,  9000]      +
#   m     Chr3 [5000, 10000]      +
#   n     Chr3 [5500, 11000]      +
#   -
#   seqinfo: 3 sequences from an unspecified genome
gr1[1:3,] #== gr[1:3] gr1[c("a","b","c")] 
## GRanges object with 3 ranges and 0 metadata columns:
##     seqnames       ranges strand
##        <Rle>    <IRanges>  <Rle>
##   a     Chr1 [1300, 2500]      +
##   b     Chr1 [1050, 1870]      +
##   c     Chr1 [2000, 3200]      +
##   -
##   seqinfo: 3 sequences from an unspecified genome
# GRanges object with 3 ranges and 0 metadata columns:
#     seqnames       ranges strand
#        <Rle>    <IRanges>  <Rle>
#   a     Chr1 [1300, 2500]      +
#   b     Chr1 [1050, 1870]      +
#   c     Chr1 [2000, 3200]      +
#   -
#   seqinfo: 3 sequences from an unspecified genome
names(gr1)
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n"
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n"
ranges(gr1)
## IRanges object with 14 ranges and 0 metadata columns:
##         start       end     width
##     <integer> <integer> <integer>
##   a      1300      2500      1201
##   b      1050      1870       821
##   c      2000      3200      1201
##   d      5000      5800       801
##   e      1000      2000      1001
##   .       ...       ...       ...
##   j      3500      7000      3501
##   k      4000      8000      4001
##   l      4500      9000      4501
##   m      5000     10000      5001
##   n      5500     11000      5501
# IRanges object with 14 ranges and 0 metadata columns:
#         start       end     width
#     <integer> <integer> <integer>
#   a      1300      2500      1201
#   b      1050      1870       821
#   c      2000      3200      1201
#   d      5000      5800       801
#   e      1000      2000      1001
#   .       ...       ...       ...
t(rbind(start(ranges(gr1)),end(ranges(gr1)),width(ranges(gr1))))
##       [,1]  [,2] [,3]
##  [1,] 1300  2500 1201
##  [2,] 1050  1870  821
##  [3,] 2000  3200 1201
##  [4,] 5000  5800  801
##  [5,] 1000  2000 1001
##  [6,] 1500  3000 1501
##  [7,] 2000  4000 2001
##  [8,] 2500  5000 2501
##  [9,] 3000  6000 3001
## [10,] 3500  7000 3501
## [11,] 4000  8000 4001
## [12,] 4500  9000 4501
## [13,] 5000 10000 5001
## [14,] 5500 11000 5501
#       [,1]  [,2] [,3]
#  [1,] 1300  2500 1201
#  [2,] 1050  1870  821
#  [3,] 2000  3200 1201
#  [4,] 5000  5800  801
#  [5,] 1000  2000 1001
#   ...       ...       ...
seqnames(gr1)
## factor-Rle of length 14 with 3 runs
##   Lengths:    5    4    5
##   Values : Chr1 Chr2 Chr3
## Levels(3): Chr1 Chr2 Chr3
# factor-Rle of length 14 with 3 runs
#   Lengths:    5    5    4
#   Values : Chr1 Chr2 Chr3
# Levels(3): Chr1 Chr2 Chr3
seqinfo(gr1)
## Seqinfo object with 3 sequences from an unspecified genome:
##   seqnames seqlengths isCircular genome
##   Chr1        1000000         NA   <NA>
##   Chr2        2500000         NA   <NA>
##   Chr3        2000000         NA   <NA>
# Seqinfo object with 3 sequences from an unspecified genome:
#   seqnames seqlengths isCircular genome
#   Chr1        1000000         NA   <NA>
#   Chr2        2500000         NA   <NA>
#   Chr3        2000000         NA   <NA>
cbind(seqlevels(gr1),as.numeric(seqlengths(gr1)),isCircular(gr1),genome(gr1))
##      [,1]   [,2]      [,3] [,4]
## Chr1 "Chr1" "1e+06"   NA   NA  
## Chr2 "Chr2" "2500000" NA   NA  
## Chr3 "Chr3" "2e+06"   NA   NA
#      [,1]   [,2]      [,3] [,4]
# Chr1 "Chr1" "1e+06"   NA   NA  
# Chr2 "Chr2" "2500000" NA   NA  
# Chr3 "Chr3" "2e+06"   NA   NA
strand(gr1)
## factor-Rle of length 14 with 3 runs
##   Lengths: 3 7 4
##   Values : + - +
## Levels(3): + - *
# factor-Rle of length 14 with 3 runs
#   Lengths: 3 7 4
#   Values : + - +
# Levels(3): + - *
seqnames(gr1) <- rep(c("Chr1", "Chr2","Chr3", "Chr3"),c(5,5,1,3))
plotRanges(gr1,addlabel = T)

plotRanges(gr1,label.group = T)

plotRanges(gr1,label.group = T,label.facet = T,ylim = c(0,25))

set.seed(1)
N <- 100
gr2 <- GRanges(seqnames = sample(c("Chr1", "Chr2", "Chr3", "Chr4"),N, replace = TRUE),
              IRanges(start = sample(100:10000, N, replace = TRUE),
                       width = sample(100:10000, N,replace = TRUE)),
              strand = sample(c("+", "-", "*"), N, replace = TRUE),
              GC = sample(seq(0,1,0.001),N,replace = TRUE), 
              score = rnorm(N, 100, 10),
              sample = sample(c("Normal", "Abormal"), N, replace = TRUE),
              group = sample(LETTERS, N,replace = TRUE))
values(gr2) #== mcols(gr2)
## DataFrame with 100 rows and 4 columns
##            GC     score      sample       group
##     <numeric> <numeric> <character> <character>
## 1       0.265 103.98106     Abormal           R
## 2       0.372  93.87974      Normal           E
## 3       0.573 103.41120      Normal           Y
## 4       0.909  88.70637      Normal           X
## 5       0.201 114.33024      Normal           Y
## ...       ...       ...         ...         ...
## 96      0.798  92.49181      Normal           L
## 97      0.455 120.87167     Abormal           L
## 98      0.410 100.17396      Normal           N
## 99      0.811  87.13699      Normal           W
## 100     0.605  83.59394      Normal           B
#            GC     score      sample       group
#     <numeric> <numeric> <character> <character>
# 1       0.478 102.91446      Normal           Z
# 2       0.862  95.56708      Normal           L
# 3       0.438 100.01105     Abormal           J
# 4       0.245 100.74341      Normal           E
# 5       0.070  94.10479     Abormal           B
# ...       ...       ...         ...         ...
# 96      0.453 123.07978      Normal           C
# 97      0.175 101.05802      Normal           Z
# 98      0.747 104.56999     Abormal           J
# 99      0.105  99.22847      Normal           O
# 100     0.865  96.65999      Normal           T

score(gr2) #== mcols(gr2)$score gr2$score
##   [1] 103.98106  93.87974 103.41120  88.70637 114.33024 119.80400  96.32779
##   [8]  89.55865 105.69720  98.64945 124.01618  99.60760 106.89739 100.28002
##  [15]  92.56727 101.88792  81.95041 114.65555 101.53253 121.72612 104.75510
##  [22]  92.90054 106.10726  90.65902  87.46367 102.91446  95.56708 100.01105
##  [29] 100.74341  94.10479  94.31331  98.64821 111.78087  84.76433 105.93946
##  [36] 103.32950 110.63100  96.95816 103.70019 102.67099  94.57480 112.07868
##  [43] 111.60403 107.00214 115.86833 105.58486  87.23408  94.26735  87.75387
##  [50]  95.26599  93.79633 100.42116  90.89078 101.58029  93.45415 117.67287
##  [57] 107.16707 109.10174 103.84185 116.82176  93.64264  95.38355 114.32282
##  [64]  93.49304  97.92619  96.07192  96.80007  97.20887 104.94188  98.22670
##  [71]  94.94043 113.43039  97.85421  98.20443  98.99809 107.12666  99.26436
##  [78]  99.62366  93.18340  96.75730 100.60160  94.11106 105.31496  84.81606
##  [85] 103.06558  84.63550  96.99024  94.71720  93.47905  99.43103  80.85641
##  [92] 111.76583  83.35028  95.36470  88.84080  92.49181 120.87167 100.17396
##  [99]  87.13699  83.59394
# [1] 103.98106  93.87974 103.41120  88.70637 114.33024 ...
gr2$GC #== mcols(gr2)$GC
##   [1] 0.265 0.372 0.573 0.909 0.201 0.899 0.945 0.661 0.629 0.061 0.206
##  [12] 0.176 0.687 0.384 0.770 0.498 0.718 0.992 0.380 0.778 0.935 0.212
##  [23] 0.652 0.125 0.267 0.386 0.013 0.382 0.870 0.340 0.482 0.600 0.494
##  [34] 0.186 0.828 0.669 0.795 0.108 0.724 0.411 0.821 0.647 0.783 0.553
##  [45] 0.530 0.790 0.023 0.477 0.733 0.693 0.478 0.862 0.438 0.245 0.070
##  [56] 0.099 0.316 0.519 0.662 0.407 0.913 0.293 0.459 0.332 0.651 0.258
##  [67] 0.479 0.767 0.084 0.876 0.339 0.840 0.347 0.334 0.476 0.893 0.865
##  [78] 0.390 0.778 0.961 0.435 0.713 0.400 0.325 0.757 0.202 0.711 0.121
##  [89] 0.245 0.143 0.239 0.058 0.642 0.877 0.779 0.798 0.455 0.410 0.811
## [100] 0.605
# [1] 0.265 0.372 0.573 0.909 0.201 0.899 0.945 0.661 ....
mcols(gr2)["score"] #== mcols(gr2)[2]
## DataFrame with 100 rows and 1 column
##         score
##     <numeric>
## 1   103.98106
## 2    93.87974
## 3   103.41120
## 4    88.70637
## 5   114.33024
## ...       ...
## 96   92.49181
## 97  120.87167
## 98  100.17396
## 99   87.13699
## 100  83.59394
# mcols stands for metadata columns

# DataFrame with 100 rows and 1 column
#         score
#     <numeric>
# 1   103.98106
# 2    93.87974
# 3   103.41120
# 4    88.70637
# 5   114.33024
# ...       ...
# 96   92.49181
# 97  120.87167
# 98  100.17396
# 99   87.13699
# 100  83.59394

ir <- ranges(gr2)
plotRanges(ir,sep=.2)

plotRanges(shift(gr2,10000),sep=.2)

findOverlapPairs(gr1,gr2)
## Pairs object with 113 pairs and 0 metadata columns:
##                     first            second
##                 <GRanges>         <GRanges>
##     [1]  Chr1:1300-2500:+  Chr1:1819-8744:*
##     [2]  Chr1:1300-2500:+   Chr1:591-6512:*
##     [3]  Chr1:1300-2500:+  Chr1:1415-9423:+
##     [4]  Chr1:1050-1870:+  Chr1:1819-8744:*
##     [5]  Chr1:1050-1870:+   Chr1:591-6512:*
##     ...               ...               ...
##   [109] Chr3:5500-11000:+ Chr3:9581-16327:+
##   [110] Chr3:5500-11000:+ Chr3:9495-17298:*
##   [111] Chr3:5500-11000:+  Chr3:6502-8024:*
##   [112] Chr3:5500-11000:+ Chr3:6521-12157:+
##   [113] Chr3:5500-11000:+  Chr3:5001-6760:+
# Pairs object with 110 pairs and 0 metadata columns:
#                     first            second
#                 <GRanges>         <GRanges>
#     [1]  Chr1:1300-2500:+  Chr1:1819-8744:*
#     [2]  Chr1:1300-2500:+   Chr1:591-6512:*
#     [3]  Chr1:1300-2500:+  Chr1:1415-9423:+
#     [4]  Chr1:1050-1870:+  Chr1:1819-8744:*
#     [5]  Chr1:1050-1870:+   Chr1:591-6512:*
#     ...               ...               ...
#   [106] Chr3:5500-11000:+ Chr3:9581-16327:+
#   [107] Chr3:5500-11000:+ Chr3:9495-17298:*
#   [108] Chr3:5500-11000:+  Chr3:6502-8024:*
#   [109] Chr3:5500-11000:+ Chr3:6521-12157:+
#   [110] Chr3:5500-11000:+  Chr3:5001-6760:+
countOverlaps(gr1,gr2)
##  a  b  c  d  e  f  g  h  i  j  k  l  m  n 
##  3  3  6 10  3  5  7 10 11 11 10 10 12 12
 # a  b  c  d  e  f  g  h  i  j  k  l  m  n 
 # 3  3  6 10  3  5  7 10 11  8 10 10 12 12 

GGbio

ggbio automatically facetting and assign , this must mean geom_rect support GRanges object

library(ggbio)
# The following objects are masked from 'package:ggplot2':
# 
#     geom_bar, geom_rect, geom_segment, ggsave, stat_bin, stat_identity, xlim
ggplot(gr2) + geom_rect()

ggplot(gr2) + geom_alignment()

ggplot(gr2) + ggplot2::geom_rect(aes(xmin = start, ymin = score,
                                    xmax = end, ymax = score + .5))

grl <- split(gr2, values(gr2)$group)
ggplot(grl) + geom_alignment()

ggplot(ir) + geom_rect()

ggplot(ir) + layout_circle(geom = "rect")

Author: Joaxin
Link: https://u.pinsflora.xyz/Reverie/html/Bioc_Ranges/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.

Comment