Genomic Ranges
rle
Run Length Encoding Integer Ranges Interval Ranges
chr.str <- c(rep("ChrI", 3000), rep("ChrII", 5000), rep("ChrIII", 2000),rep("ChrIV", 7000)) v <- rle(chr.str) as.vector(v) # Run Length Encoding # lengths: int [1:4] 3000 5000 2000 7000 # values : chr [1:4] "ChrI" "ChrII" "ChrIII" "ChrIV" as.character(v) # [1] "c(3000, 5000, 2000, 7000)" # [2] "c(\"ChrI\", \"ChrII\", \"ChrIII\", \"ChrIV\")" identical(inverse.rle(v),chr.str) # TRUE |
Even though factors are basically just integer vectors with some information about levels attached, the rle() function doesn’t work with factors. One issue that might be problematic is that each NA is treated as a run of length 1, even if the NA’s are next to each other.
is.na(chr.str) <- 2:20 # rle(factor(chr.str)) # 'x' must be a vector of an atomic type rle(chr.str) # Run Length Encoding # lengths: int [1:24] 1 1 1 1 1 1 1 1 1 1 ... # values : chr [1:24] "ChrI" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "ChrI" "ChrII" "ChrIII" "ChrIV" rrle <- function(x,...){ cx <- as.character(x) cx[is.na(cx)] <- "ZZZ" v <- rle(cx) # Replace the ZZZ's with NA in the RLE-coded data v$values[ v$values=="ZZZ" ] <- NA return(v) } rrle(chr.str) rrle(factor(chr.str)) # Run Length Encoding # lengths: int [1:6] 1 19 2980 5000 2000 7000 # values : chr [1:6] "ChrI" NA "ChrI" "ChrII" "ChrIII" "ChrIV" # object size as.vector(object.size(rrle(chr.str))/object.size(chr.str)) # 0.006459569 # but set.seed(1) rchr.str <- chr.str[sample(length(chr.str))] as.vector(object.size(rrle(rchr.str))/object.size(chr.str)) # 1.049797 |
Rle
library(IRanges) |
chr.rle <- Rle(chr.str) # as(chr.str, "Rle") # character-Rle of length 17000 with 6 runs # Lengths: 1 19 2980 5000 2000 7000 # Values : "ChrI" "NA" "ChrI" "ChrII" "ChrIII" "ChrIV" identical(as.character(chr.rle), chr.str) # TRUE chr.rle chr.rle@values[2] <- "ChrI" chr.rle <- Rle(as.character(chr.rle)) # character-Rle of length 17000 with 4 runs # Lengths: 3000 5000 2000 7000 # Values : "ChrI" "ChrII" "ChrIII" "ChrIV" chr.rle[1:5] #== chr.rle[IRanges(1,5)] # character-Rle of length 5 with 1 run # Lengths: 5 # Values : "ChrI" subset(chr.rle,chr.rle>="ChrII") # character-Rle of length 14000 with 3 runs # Lengths: 5000 2000 7000 # Values : "ChrII" "ChrIII" "ChrIV" c(chr.rle,chr.rle) #== rep(chr.rle,2) append(chr.rle,chr.rle) # character-Rle of length 34000 with 8 runs # Lengths: 3000 5000 ... 2000 7000 # Values : "ChrI" "ChrII" ... "ChrIII" "ChrIV" |
chr.rle <- Rle(values = c("ChrI", "ChrII", "ChrIII", "ChrIV", "ChrV","ChrII"),lengths = c(1000,2000,4000,3500,1000, 500)) # character-Rle of length 12000 with 6 runs # Lengths: 1000 2000 4000 3500 1000 500 # Values : "ChrI" "ChrII" "ChrIII" "ChrIV" "ChrV" "ChrII" str(chr.rle) # Formal class 'Rle' [package "S4Vectors"] with 4 slots # ..@ values : chr [1:6] "ChrI" "ChrII" "ChrIII" "ChrIV" ... # ..@ lengths : int [1:6] 1000 2000 4000 3500 1000 500 # ..@ elementMetadata: NULL # ..@ metadata : list() length(chr.rle) # [1] 12000 runLength(chr.rle) # 1000 2000 4000 3500 1000 500 runValue(chr.rle) # "ChrI" "ChrII" "ChrIII" "ChrIV" "ChrV" "ChrII" nrun(chr.rle) # 6 rbind(start(chr.rle),end(chr.rle),width(chr.rle)) # [,1] [,2] [,3] [,4] [,5] [,6] # [1,] 1 1001 3001 7001 10501 11501 # [2,] 1000 3000 7000 10500 11500 12000 # [3,] 1000 2000 4000 3500 1000 500 range(chr.rle) # [1] "ChrI" "ChrV" runLength(chr.rle) <- rep(2500, nrun(chr.rle)) runValue(chr.rle)[3:4] <- c("ChrIV", "ChrIIV") chr.rle # character-Rle of length 15000 with 6 runs # Lengths: 2500 2500 2500 2500 2500 2500 # Values : "ChrI" "ChrII" "ChrIV" "ChrIIV" "ChrV" "ChrII" identical(as.factor(chr.rle),factor(as.character(chr.rle))) # TRUE |
set.seed(0) rle1 <- Rle(sample(4, 6, replace = TRUE)) rle2 <- Rle(sample(5, 12, replace = TRUE)) # rle1 # Lengths: 2 1 1 1 1 # Values : 2 4 1 4 2 # rle2 # Lengths: 1 2 1 2 1 1 1 1 1 1 # Values : 5 2 3 5 2 4 5 3 4 2 identical(rle1 + rle2,Rle(as.integer(rle1)+as.integer(rle2))) # TRUE identical(rle1 * rle2,Rle(as.integer(rle1)*as.integer(rle2))) # TRUE sqrt(rle1) # numeric-Rle of length 6 with 5 runs # Lengths: 1 2 1 1 1 # Values : 2 1.4142135623731 1.73205080756888 2 1 rle1 > 2 | rle2 > 4 # logical-Rle of length 12 with 8 runs # Lengths: 2 1 2 1 1 2 2 1 # Values : TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE sum(rle1 > 2 | rle2 > 4) # 7 paste(rle1, rle2, sep = "") # character-Rle of length 12 with 12 runs # Lengths: 1 1 1 1 1 1 1 1 1 1 1 1 # Values : "25" "22" "42" "13" "45" "25" "22" "24" "45" "13" "44" "22" |
IRanges
Ranges
ir1 <- IRanges(start = 1:10, width = 10:1) # IRanges object with 10 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1 10 10 # [2] 2 10 9 # [3] 3 10 8 # [4] 4 10 7 # [5] 5 10 6 # [6] 6 10 5 # [7] 7 10 4 # [8] 8 10 3 # [9] 9 10 2 # [10] 10 10 1 head(ir1)[1:2];tail(ir1)[1:2];rev(ir1)[1:2] # IRanges object with 2 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1 10 10 # [2] 2 10 9 # IRanges object with 2 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 5 10 6 # [2] 6 10 5 # IRanges object with 2 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 10 10 1 # [2] 9 10 2 subset(ir1,width(ir1)>=5) # IRanges object with 6 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1 10 10 # [2] 2 10 9 # [3] 3 10 8 # [4] 4 10 7 # [5] 5 10 6 # [6] 6 10 5 ir1[IRanges(start = c(1,2),width = 2)] # IRanges object with 4 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1 10 10 # [2] 2 10 9 # [3] 2 10 9 # [4] 3 10 8 ir1[[2]] # [1] 2 3 4 5 6 7 8 9 10 ir2 <- ir1 start(ir2) <- c(6:10,1:5) window(ir2,5) #== window(ir,5,10) # IRanges object with 6 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 10 10 1 # [2] 1 10 10 # [3] 2 10 9 # [4] 3 10 8 # [5] 4 10 7 # [6] 5 10 6 plotRanges(ir2,addlabel = T) |
IRanges: endoapply~lapply, and sapply, for looping over sequences and two generics, aggregate and shiftApply, to perform calculations over subsequences.
The aggregate function combines sequence extraction functionality of the window function with looping capabilities of the sapply function.
aggregate(ir2, start = 1:8, width = 3, FUN = mean) # [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] # [1,] 8.0 8.5 9.0 9.5 10.0 5.5 6.0 6.5 # [2,] 8.5 9.0 9.5 10.0 5.5 6.0 6.5 7.0 # [3,] 9.0 9.5 10.0 5.5 6.0 6.5 7.0 7.5 |
The shiftApply function is a looping operation involving two sequences whose elements are lined up via a positional shift operation.
idx <- seq(9) shifts <- shiftApply(idx,ir2,ir1,identical) plot(x=idx,y=shifts,type="b") |
Intra-range transformations
ir <- IRanges(c(1000, 2000,3000,1500,2500,4000), width = c(500, 1000,500,1000,2000,2500)) # start end width # <integer> <integer> <integer> # [1] 1000 1499 500 # [2] 2000 2999 1000 # [3] 3000 3499 500 # [4] 1500 2499 1000 # [5] 2500 4499 2000 # [6] 4000 6499 2500 ir + seq(-200,300,100) |
## IRanges object with 6 ranges and 0 metadata columns: ## start end width ## <integer> <integer> <integer> ## [1] 1200 1299 100 ## [2] 2100 2899 800 ## [3] 3000 3499 500 ## [4] 1400 2599 1200 ## [5] 2300 4699 2400 ## [6] 3700 6799 3100 |
# IRanges object with 6 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1200 1299 100 # [2] 2100 2899 800 # [3] 3000 3499 500 # [4] 1400 2599 1200 # [5] 2300 4699 2400 # [6] 3700 6799 3100 # positive contracts (zooms in) and negative expands (zooms out) ir*2 |
## IRanges object with 6 ranges and 0 metadata columns: ## start end width ## <integer> <integer> <integer> ## [1] 1125 1374 250 ## [2] 2250 2749 500 ## [3] 3125 3374 250 ## [4] 1750 2249 500 ## [5] 3000 3999 1000 ## [6] 4625 5874 1250 |
plotRanges(ir[-3]*2,shift(ir[-3]*-2,6000),height = .5,sep=.4,colors = c("red","gray20","white")) |
Shift
shift(x, shift=0L, use.names=TRUE)
plotRanges(ir,shift(ir, -5000),shift(ir, 1000),height = .5,sep=.4) |
Narrow
narrow(x, start=NA, end=NA, width=NA, use.names=TRUE)
plotRanges(ir,narrow(ir, 100),narrow(ir, -200),colors = c("red","green","blue"),addlabel = T) |
narrow(ir, 100) |
## IRanges object with 6 ranges and 0 metadata columns: ## start end width ## <integer> <integer> <integer> ## [1] 1099 1499 401 ## [2] 2099 2999 901 ## [3] 3099 3499 401 ## [4] 1599 2499 901 ## [5] 2599 4499 1901 ## [6] 4099 6499 2401 |
# IRanges object with 6 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1099 1499 401 # [2] 2099 2999 901 # [3] 3099 3499 401 # [4] 1599 2499 901 # [5] 2599 4499 1901 # [6] 4099 6499 2401 narrow(ir, -200) |
## IRanges object with 6 ranges and 0 metadata columns: ## start end width ## <integer> <integer> <integer> ## [1] 1300 1499 200 ## [2] 2800 2999 200 ## [3] 3300 3499 200 ## [4] 2300 2499 200 ## [5] 4300 4499 200 ## [6] 6300 6499 200 |
# IRanges object with 6 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 1300 1499 200 # [2] 2800 2999 200 # [3] 3300 3499 200 # [4] 2300 2499 200 # [5] 4300 4499 200 # [6] 6300 6499 200 |
Flank
flank(x, width, start=TRUE, both=FALSE, use.names=TRUE, …)
plotRanges(ir,flank(ir, 100),flank(ir, -100),colors = c("red","green","blue")) |
plotRanges(ir,flank(ir, 100,both = T),flank(ir, -100,start=F),colors = c("red","green","blue")) |
Reflect
reflect(x, bounds, use.names=TRUE)
bounds <- IRanges(1000, width = 1000) ir.trans <- reflect(ir, bounds = bounds) plotRanges(ir,ir.trans,bounds,colors = c("red","green","blue")) |
Promoters
promoters(x, upstream=2000, downstream=200, …)
ir.trans <- promoters(ir, upstream = 200, downstream = 100) plotRanges(ir,ir.trans,addlabel = T,label = 1:6) |
Resize
resize(x, width, fix=“start”, use.names=TRUE, …)
plotRanges(ir,resize(ir, width = c(100, 500),fix = "end"),resize(ir, width = c(100, 500), fix = "center")) |
dat <- as.data.frame(rbind(as.data.frame(ir),as.data.frame(resize(ir, width = c(100, 500)), fix = "start"), as.data.frame(resize(ir, width = c(100, 500), fix = "center")))) rownames(dat) <- paste0(rep(LETTERS[1:3],each=6),1:6) plotRanges(IRanges(start = dat$start,end=dat$end),label = rownames(dat)) |
### Restrict restrict(x, start=NA, end=NA, keep.all.ranges=FALSE, use.names=TRUE)
ir.trans1 <- restrict(ir,200,2000) ir.trans2 <- restrict(ir,3000,3500) plotRanges(ir,ir.trans1,ir.trans2) |
### Threebands
ir.trans <- threebands(ir, start=40, end=-200) plotRanges(ir,ir.trans$left,ir.trans$right) |
plotRanges(ir,narrow(ir,40,-200),ir.trans$middle) |
Inter-range transformations
ir <- IRanges(c(200, 1000, 3000, 2500,3500,1500,4000), width = c(600, 1000, 300, 850,500,700,250)) # IRanges object with 7 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 200 799 600 # [2] 1000 1999 1000 # [3] 3000 3299 300 # [4] 2500 3349 850 # [5] 3500 3999 500 # [6] 1500 2199 700 # [7] 4000 4249 250 ir.trans <- range(ir) plotRanges(ir,ir.trans,alpha = .2) |
plotRanges(ir,reduce(ir),group = F,colors = c("gray90","black","white")) |
plotRanges(ir,gaps(ir),group = F) |
plotRanges(ir,disjoin(ir),group = F) |
# IRanges object with 9 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # [1] 200 799 600 # [2] 1000 1499 500 # [3] 1500 1999 500 # [4] 2000 2199 200 # [5] 2500 2999 500 # [6] 3000 3299 300 # [7] 3300 3349 50 # [8] 3500 3999 500 # [9] 4000 4249 250 findOverlaps(ir) #== findOverlaps(ir,ir) |
## SelfHits object with 11 hits and 0 metadata columns: ## queryHits subjectHits ## <integer> <integer> ## [1] 1 1 ## [2] 2 2 ## [3] 2 6 ## [4] 3 4 ## [5] 3 3 ## [6] 4 4 ## [7] 4 3 ## [8] 5 5 ## [9] 6 2 ## [10] 6 6 ## [11] 7 7 ## - ## queryLength: 7 / subjectLength: 7 |
# SelfHits object with 11 hits and 0 metadata columns: # queryHits subjectHits # <integer> <integer> # [1] 1 1 # [2] 2 2 # [3] 2 6 # [4] 3 4 # [5] 3 3 # [6] 4 4 # [7] 4 3 # [8] 5 5 # [9] 6 2 # [10] 6 6 # [11] 7 7 # - # queryLength: 7 / subjectLength: 7 countOverlaps(ir) |
## [1] 1 2 2 2 1 2 1 |
# 1 2 2 2 1 2 1 |
coverage
# coverage(ir) # integer-Rle of length 4249 with 12 runs # Lengths: 199 600 200 500 500 200 300 500 300 50 150 750 # Values : 0 1 0 1 2 1 0 1 2 1 0 1 cov <- as.vector(coverage(ir)) mat <- as.data.frame(cbind(seq_along(cov), cov)) plotRanges(ir,height = 1) + geom_line(aes(x=V1,y=cov),color="red",lwd=1,data=mat) |
Union
intersect求交集区域;setdiff求差异区域;union求并集
ir1 <- IRanges(c(200, 1000, 3000, 2500,4000,6500), width = c(600, 1000, 800, 850,400,200)) ir2 <- IRanges(c(100, 1500, 2000, 3500,5000,7000), width = c(500, 800, 1000, 550,700,400)) distance(ir1,ir2) |
## [1] 0 0 0 150 600 300 |
# [1] 0 0 0 150 600 300 plotRanges(ir1,ir2,colors=c("gray90","gray10","red"),alpha = .5,addlabel = T) |
plotRanges(intersect(ir1, ir2)) |
plotRanges(setdiff(ir1, ir2)) |
plotRanges(union(ir1, ir2)) |
plotRanges(punion(ir1, ir2,fill.gap=T)) |
GRanges
gr1 <- GRanges(seqnames = Rle(c("Chr1", "Chr2","Chr3", "Chr3"), c(5:3,2)), ranges = IRanges(start = c(1300,1050, 2000,5000,seq(1000,5500,500)), end = c(2500, 1870, 3200,5800,seq(2000,11000,1000)), names = head(letters,14)), strand = Rle(c("+", "+", "-","-","+"),c(1:4,4)), seqlengths = c(Chr1 = 1e+06,Chr2 = 2.5e+06,Chr3 = 2e+06)) # GRanges object with 14 ranges and 0 metadata columns: # seqnames ranges strand # <Rle> <IRanges> <Rle> # a Chr1 [1300, 2500] + # b Chr1 [1050, 1870] + # c Chr1 [2000, 3200] + # d Chr1 [5000, 5800] - # e Chr1 [1000, 2000] - # . ... ... ... # j Chr3 [3500, 7000] - # k Chr3 [4000, 8000] + # l Chr3 [4500, 9000] + # m Chr3 [5000, 10000] + # n Chr3 [5500, 11000] + # - # seqinfo: 3 sequences from an unspecified genome gr1[1:3,] #== gr[1:3] gr1[c("a","b","c")] |
## GRanges object with 3 ranges and 0 metadata columns: ## seqnames ranges strand ## <Rle> <IRanges> <Rle> ## a Chr1 [1300, 2500] + ## b Chr1 [1050, 1870] + ## c Chr1 [2000, 3200] + ## - ## seqinfo: 3 sequences from an unspecified genome |
# GRanges object with 3 ranges and 0 metadata columns: # seqnames ranges strand # <Rle> <IRanges> <Rle> # a Chr1 [1300, 2500] + # b Chr1 [1050, 1870] + # c Chr1 [2000, 3200] + # - # seqinfo: 3 sequences from an unspecified genome names(gr1) |
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" |
# [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" ranges(gr1) |
## IRanges object with 14 ranges and 0 metadata columns: ## start end width ## <integer> <integer> <integer> ## a 1300 2500 1201 ## b 1050 1870 821 ## c 2000 3200 1201 ## d 5000 5800 801 ## e 1000 2000 1001 ## . ... ... ... ## j 3500 7000 3501 ## k 4000 8000 4001 ## l 4500 9000 4501 ## m 5000 10000 5001 ## n 5500 11000 5501 |
# IRanges object with 14 ranges and 0 metadata columns: # start end width # <integer> <integer> <integer> # a 1300 2500 1201 # b 1050 1870 821 # c 2000 3200 1201 # d 5000 5800 801 # e 1000 2000 1001 # . ... ... ... t(rbind(start(ranges(gr1)),end(ranges(gr1)),width(ranges(gr1)))) |
## [,1] [,2] [,3] ## [1,] 1300 2500 1201 ## [2,] 1050 1870 821 ## [3,] 2000 3200 1201 ## [4,] 5000 5800 801 ## [5,] 1000 2000 1001 ## [6,] 1500 3000 1501 ## [7,] 2000 4000 2001 ## [8,] 2500 5000 2501 ## [9,] 3000 6000 3001 ## [10,] 3500 7000 3501 ## [11,] 4000 8000 4001 ## [12,] 4500 9000 4501 ## [13,] 5000 10000 5001 ## [14,] 5500 11000 5501 |
# [,1] [,2] [,3] # [1,] 1300 2500 1201 # [2,] 1050 1870 821 # [3,] 2000 3200 1201 # [4,] 5000 5800 801 # [5,] 1000 2000 1001 # ... ... ... seqnames(gr1) |
## factor-Rle of length 14 with 3 runs ## Lengths: 5 4 5 ## Values : Chr1 Chr2 Chr3 ## Levels(3): Chr1 Chr2 Chr3 |
# factor-Rle of length 14 with 3 runs # Lengths: 5 5 4 # Values : Chr1 Chr2 Chr3 # Levels(3): Chr1 Chr2 Chr3 seqinfo(gr1) |
## Seqinfo object with 3 sequences from an unspecified genome: ## seqnames seqlengths isCircular genome ## Chr1 1000000 NA <NA> ## Chr2 2500000 NA <NA> ## Chr3 2000000 NA <NA> |
# Seqinfo object with 3 sequences from an unspecified genome: # seqnames seqlengths isCircular genome # Chr1 1000000 NA <NA> # Chr2 2500000 NA <NA> # Chr3 2000000 NA <NA> cbind(seqlevels(gr1),as.numeric(seqlengths(gr1)),isCircular(gr1),genome(gr1)) |
## [,1] [,2] [,3] [,4] ## Chr1 "Chr1" "1e+06" NA NA ## Chr2 "Chr2" "2500000" NA NA ## Chr3 "Chr3" "2e+06" NA NA |
# [,1] [,2] [,3] [,4] # Chr1 "Chr1" "1e+06" NA NA # Chr2 "Chr2" "2500000" NA NA # Chr3 "Chr3" "2e+06" NA NA strand(gr1) |
## factor-Rle of length 14 with 3 runs ## Lengths: 3 7 4 ## Values : + - + ## Levels(3): + - * |
# factor-Rle of length 14 with 3 runs # Lengths: 3 7 4 # Values : + - + # Levels(3): + - * seqnames(gr1) <- rep(c("Chr1", "Chr2","Chr3", "Chr3"),c(5,5,1,3)) plotRanges(gr1,addlabel = T) |
plotRanges(gr1,label.group = T) |
plotRanges(gr1,label.group = T,label.facet = T,ylim = c(0,25)) |
set.seed(1) N <- 100 gr2 <- GRanges(seqnames = sample(c("Chr1", "Chr2", "Chr3", "Chr4"),N, replace = TRUE), IRanges(start = sample(100:10000, N, replace = TRUE), width = sample(100:10000, N,replace = TRUE)), strand = sample(c("+", "-", "*"), N, replace = TRUE), GC = sample(seq(0,1,0.001),N,replace = TRUE), score = rnorm(N, 100, 10), sample = sample(c("Normal", "Abormal"), N, replace = TRUE), group = sample(LETTERS, N,replace = TRUE)) values(gr2) #== mcols(gr2) |
## DataFrame with 100 rows and 4 columns ## GC score sample group ## <numeric> <numeric> <character> <character> ## 1 0.265 103.98106 Abormal R ## 2 0.372 93.87974 Normal E ## 3 0.573 103.41120 Normal Y ## 4 0.909 88.70637 Normal X ## 5 0.201 114.33024 Normal Y ## ... ... ... ... ... ## 96 0.798 92.49181 Normal L ## 97 0.455 120.87167 Abormal L ## 98 0.410 100.17396 Normal N ## 99 0.811 87.13699 Normal W ## 100 0.605 83.59394 Normal B |
# GC score sample group # <numeric> <numeric> <character> <character> # 1 0.478 102.91446 Normal Z # 2 0.862 95.56708 Normal L # 3 0.438 100.01105 Abormal J # 4 0.245 100.74341 Normal E # 5 0.070 94.10479 Abormal B # ... ... ... ... ... # 96 0.453 123.07978 Normal C # 97 0.175 101.05802 Normal Z # 98 0.747 104.56999 Abormal J # 99 0.105 99.22847 Normal O # 100 0.865 96.65999 Normal T score(gr2) #== mcols(gr2)$score gr2$score |
## [1] 103.98106 93.87974 103.41120 88.70637 114.33024 119.80400 96.32779 ## [8] 89.55865 105.69720 98.64945 124.01618 99.60760 106.89739 100.28002 ## [15] 92.56727 101.88792 81.95041 114.65555 101.53253 121.72612 104.75510 ## [22] 92.90054 106.10726 90.65902 87.46367 102.91446 95.56708 100.01105 ## [29] 100.74341 94.10479 94.31331 98.64821 111.78087 84.76433 105.93946 ## [36] 103.32950 110.63100 96.95816 103.70019 102.67099 94.57480 112.07868 ## [43] 111.60403 107.00214 115.86833 105.58486 87.23408 94.26735 87.75387 ## [50] 95.26599 93.79633 100.42116 90.89078 101.58029 93.45415 117.67287 ## [57] 107.16707 109.10174 103.84185 116.82176 93.64264 95.38355 114.32282 ## [64] 93.49304 97.92619 96.07192 96.80007 97.20887 104.94188 98.22670 ## [71] 94.94043 113.43039 97.85421 98.20443 98.99809 107.12666 99.26436 ## [78] 99.62366 93.18340 96.75730 100.60160 94.11106 105.31496 84.81606 ## [85] 103.06558 84.63550 96.99024 94.71720 93.47905 99.43103 80.85641 ## [92] 111.76583 83.35028 95.36470 88.84080 92.49181 120.87167 100.17396 ## [99] 87.13699 83.59394 |
# [1] 103.98106 93.87974 103.41120 88.70637 114.33024 ... gr2$GC #== mcols(gr2)$GC |
## [1] 0.265 0.372 0.573 0.909 0.201 0.899 0.945 0.661 0.629 0.061 0.206 ## [12] 0.176 0.687 0.384 0.770 0.498 0.718 0.992 0.380 0.778 0.935 0.212 ## [23] 0.652 0.125 0.267 0.386 0.013 0.382 0.870 0.340 0.482 0.600 0.494 ## [34] 0.186 0.828 0.669 0.795 0.108 0.724 0.411 0.821 0.647 0.783 0.553 ## [45] 0.530 0.790 0.023 0.477 0.733 0.693 0.478 0.862 0.438 0.245 0.070 ## [56] 0.099 0.316 0.519 0.662 0.407 0.913 0.293 0.459 0.332 0.651 0.258 ## [67] 0.479 0.767 0.084 0.876 0.339 0.840 0.347 0.334 0.476 0.893 0.865 ## [78] 0.390 0.778 0.961 0.435 0.713 0.400 0.325 0.757 0.202 0.711 0.121 ## [89] 0.245 0.143 0.239 0.058 0.642 0.877 0.779 0.798 0.455 0.410 0.811 ## [100] 0.605 |
# [1] 0.265 0.372 0.573 0.909 0.201 0.899 0.945 0.661 .... mcols(gr2)["score"] #== mcols(gr2)[2] |
## DataFrame with 100 rows and 1 column ## score ## <numeric> ## 1 103.98106 ## 2 93.87974 ## 3 103.41120 ## 4 88.70637 ## 5 114.33024 ## ... ... ## 96 92.49181 ## 97 120.87167 ## 98 100.17396 ## 99 87.13699 ## 100 83.59394 |
# mcols stands for metadata columns # DataFrame with 100 rows and 1 column # score # <numeric> # 1 103.98106 # 2 93.87974 # 3 103.41120 # 4 88.70637 # 5 114.33024 # ... ... # 96 92.49181 # 97 120.87167 # 98 100.17396 # 99 87.13699 # 100 83.59394 ir <- ranges(gr2) plotRanges(ir,sep=.2) |
plotRanges(shift(gr2,10000),sep=.2) |
findOverlapPairs(gr1,gr2) |
## Pairs object with 113 pairs and 0 metadata columns: ## first second ## <GRanges> <GRanges> ## [1] Chr1:1300-2500:+ Chr1:1819-8744:* ## [2] Chr1:1300-2500:+ Chr1:591-6512:* ## [3] Chr1:1300-2500:+ Chr1:1415-9423:+ ## [4] Chr1:1050-1870:+ Chr1:1819-8744:* ## [5] Chr1:1050-1870:+ Chr1:591-6512:* ## ... ... ... ## [109] Chr3:5500-11000:+ Chr3:9581-16327:+ ## [110] Chr3:5500-11000:+ Chr3:9495-17298:* ## [111] Chr3:5500-11000:+ Chr3:6502-8024:* ## [112] Chr3:5500-11000:+ Chr3:6521-12157:+ ## [113] Chr3:5500-11000:+ Chr3:5001-6760:+ |
# Pairs object with 110 pairs and 0 metadata columns: # first second # <GRanges> <GRanges> # [1] Chr1:1300-2500:+ Chr1:1819-8744:* # [2] Chr1:1300-2500:+ Chr1:591-6512:* # [3] Chr1:1300-2500:+ Chr1:1415-9423:+ # [4] Chr1:1050-1870:+ Chr1:1819-8744:* # [5] Chr1:1050-1870:+ Chr1:591-6512:* # ... ... ... # [106] Chr3:5500-11000:+ Chr3:9581-16327:+ # [107] Chr3:5500-11000:+ Chr3:9495-17298:* # [108] Chr3:5500-11000:+ Chr3:6502-8024:* # [109] Chr3:5500-11000:+ Chr3:6521-12157:+ # [110] Chr3:5500-11000:+ Chr3:5001-6760:+ countOverlaps(gr1,gr2) |
## a b c d e f g h i j k l m n ## 3 3 6 10 3 5 7 10 11 11 10 10 12 12 |
# a b c d e f g h i j k l m n # 3 3 6 10 3 5 7 10 11 8 10 10 12 12 |
GGbio
ggbio automatically facetting and assign , this must mean geom_rect support GRanges object
library(ggbio) # The following objects are masked from 'package:ggplot2': # # geom_bar, geom_rect, geom_segment, ggsave, stat_bin, stat_identity, xlim ggplot(gr2) + geom_rect() |
ggplot(gr2) + geom_alignment() |
ggplot(gr2) + ggplot2::geom_rect(aes(xmin = start, ymin = score, xmax = end, ymax = score + .5)) |
grl <- split(gr2, values(gr2)$group) ggplot(grl) + geom_alignment() |
ggplot(ir) + geom_rect() |
ggplot(ir) + layout_circle(geom = "rect") |






