R Introduction
Data Types
a[-c(1, 3)] a[[3]] <- NULL a[c(TRUE, FALSE, TRUE)] ### yes
Atomic vector
c:combine, comprises the same types (character, double, integer, logical)
Basic
v <- c(1,2L,3!=3,c(4==4)) #== c(1,2L,3!=3,c(4==4)) -> v # TRUE(T) will be coreced to 1 , FALSE(F) is 0 # L means coreced to integer # 1 2 0 1 names(v) <- letters[1:4] # a b c d # 1 2 0 1 v[2] # b # 2 v[c(1,2,2,1)] # 1 2 2 1 v[[2]] # 2 v[2:4] #== v[c(2,3,4)];v[c(2:4)];v[c(F,T,T,T)] # b c d # 2 0 1 v[c("a","b")] v[1:2] # a b # 1 2 v[-2] # Drop the 2rd element # a c d # 1 0 1 v[-1:-2] # Drop first two # 0 1 v[v>0] # 1 2 1 append(v,2) # 1 2 0 1 2 append(v,v,after = 2) # 1 2 1 2 0 1 0 1 |
Logic
c(F,T,F,T) & c(T,F,F,T) # FALSE FALSE FALSE TRUE c(F,T,F,T) && c(T,F,F,T) # FALSE c(F,T,F,T) | c(T,F,F,T) # TRUE TRUE FALSE TRUE c(F,T,F,T) || c(T,F,F,T) #TRUE v==c(1,2,0,1) # a b c d # TRUE TRUE TRUE TRUE c(1,2,3) %in% v # TRUE TRUE FALSE vv <- v > 1 vv # FALSE TRUE FALSE FALSE all(v > 1) # Are all of the values true? # FALSE any(v > 1) # Are any of the values true? # TRUE |
Special character vectors
letters # "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" # "x" "y" "z" LETTERS # "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" # "X" "Y" "Z" month.abb # abbreviations of months # "Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec" month.name # "January" "February" "March" "April" "May" "June" "July" "August" "September" # "October" "November" "December" # months in my current locale |
Sampling
set.seed(0) # Ensure the reproduible results sample(c(letters[1:5]),5) #== sample(c(letters[1:5])) # "e" "b" "d" "c" "a" sample(c(letters[1:5]),5,replace = T) # "b" "e" "e" "d" "d" nums <- sample(10,10) # 1 2 9 5 3 4 8 6 7 10 order(nums) #== sort.list(nums) # 1 2 5 6 4 8 9 7 3 10 sort(nums) # 1 2 3 4 5 6 7 8 9 10 |
Identical
x <- vector("numeric", length = 10) y <- rep(0,10) identical(x,y) # True vect <- c(foo = 11, bar = 2, foobar = NA) names(vect) # "foo" "bar" "foobar" vect2 <- c(11, 2, NA) names(vect2) <- c("foo", "bar", "foobar") identical(vect, vect2) # TRUE |
Factors
Basic
Factors are just some series of special character atomic vector
set.seed(0) f <- factor(sample(LETTERS[c(1,20,3,7)],10,replace = T)) # [1] G T T C G A G G C C # Levels: A C G T f[2] # [1] T # Levels: A C G T f[2:3, drop = TRUE] # [1] T T # Levels: G levels(f) # "A" "C" "G" "T" as.character(f) # [1] "G" "T" "T" "C" "G" "A" "G" "G" "C" "C" as.numeric(f) # 3 4 4 2 3 1 3 3 2 2 unclass(f) # [1] 3 4 4 2 3 1 3 3 2 2 # attr(,"levels") # [1] "A" "C" "G" "T" |
Levels manipulation
f[4] <- "U" ## "U" not in the levels ## invalid factor level, NA generated f # [1] G T T <NA> G A G G C C # Levels: A C G T levels(f) <- c(levels(f),"U") #== levels(f)[5] <- "U" f[4] <- "U" # [1] G T T U G A G G C C # Levels: A C G T U levels(f)[4] <- "U" # [1] G U U U G A G G C C # Levels: A C G U |
## Renaming levels of a factor levels(f) <- list("adenine"="A", "thymine"="T", "cytosine"="C","guanine"="G","uracil"="U") # [1] guanine uracil uracil uracil guanine adenine guanine guanine cytosine cytosine # Levels: adenine thymine cytosine guanine uracil ## Drop the extra levels droplevels(f) # [1] guanine uracil uracil uracil guanine adenine guanine guanine cytosine cytosine # Levels: adenine cytosine guanine uracil |
Changing the order of levels
Order will determine, for example, how output will be printed, or the arrangement of items on a graph
set.seed(0) f <- factor(sample(LETTERS[c(1,20,3,7)],10,replace = T),levels = c("A", "T", "C", "G")) f # [1] G T T C G A G G C C # Levels: A T C G f <- factor(f,levels=rev(levels(f))) # [1] A A A C T G T C G T # Levels: G C T A f<- relevel(f, "A") # [1] A A A C T G T C G T # Levels: A G C T Bases <- ordered(c("A" ,"T", "C", "G", "G")) # [1] A T C G G # Levels: A < C < G < T Bases<- ordered(Bases, levels = c("A", "T", "C","G")) # [1] A T C G G # Levels: A < T < C < G |
Lists
Indexing into a list
l <- list( film = list("The Shape of Water", "Dunkirk", "Three Billboards Outside Ebbing, Missouri","Coco","Call Me by Your Name"), score = c(7.3, 8.5,8.7,9.1,8.9),"The Best Film nomination") ## list[name or index] will return a list l[2] l["score"] # $score # [1] 7.3 8.5 8.7 9.1 8.9 ## list[[name or index]] or $name will return a vector l[[2]] l[["score"]] l$score l[["s", exact = FALSE]] # [1] 7.3 8.5 8.7 9.1 8.9 l[[1]][[1]] # "The Shape of Water" l[[1]][1] # [[1]] # [1] "The Shape of Water" l[-1] # $score # [1] 7.3 8.5 8.7 9.1 8.9 # # [[2]] # [1] "The Best Film nomination" # l[[-1]] ## Error in l[[-1]] : ## attempt to select more than one element in get1index <real> l[[c(2, 3)]] #== l[[2]][[3]] # 8.7 l[[c(1, 3)]] #== l[[1]][[3]] # "Three Billboards Outside Ebbing, Missouri" names(l) # [1] "film" "score" "" l[[3]] <- NULL #== l[3] <- NULL # $film # $film[[1]] # [1] "The Shape of Water" # ... # $film[[5]] # [1] "Call Me by Your Name" # # $score # [1] 7.3 8.5 8.7 9.1 8.9 |
Str
l[3] <- "The Best Film nomination" str(l) #== str(c(l)) # List of 3 # $ film :List of 5 # $ film :List of 5 # ..$ : chr "The Shape of Water" # ..$ : chr "Dunkirk" # ..$ : chr "Three Billboards Outside Ebbing, Missouri" # ..$ : chr "Coco" # ..$ : chr "Call Me by Your Name" # $ score: num [1:5] 7.3 8.5 8.7 9.1 8.9 # $ : chr "The Best Film nomination" c(l) #== l # $film # $film[[1]] # [1] "Lady Bird" # # ... # $film[[5]] # [1] "Call Me by Your Name" # # # $score # [1] 8.0 8.5 8.7 7.5 8.9 # # [[3]] # [1] "The Best Film nomination" str(c(list = l[2],l[[2]])) # List of 6 # $ list.score: num [1:5] 7.3 8.5 8.7 9.1 8.9 # $ : num 7.3 # $ : num 8.5 # $ : num 8.7 # $ : num 9.1 # $ : num 8.9 as.list(l[[2]]) # [[1]] # [1] 8 # # [[2]] # [1] 8.5 # # [[3]] # [1] 8.7 # # [[4]] # [1] 7.5 # # [[5]] # [1] 8.9 |
Matrix
Data Frames
Names
d <- data.frame(subject = 1:4, sex = c("M", "F", "F", "M"), size = c(4,2,5,3)) # subject sex size # 1 1 M 4 # 2 2 F 2 # 3 3 F 5 # 4 4 M 3 print(d,row.names = F) # subject sex size # 1 M 4 # 2 F 2 # 3 F 5 # 4 M 3 dim(d) # nrow(d);ncol(d) # [1] 4 3 dimnames(d) # [[1]] # [1] "1" "2" "3" "4" # # [[2]] # [1] "subject" "sex" "size" rownames(d);colnames(d) # [1] "1" "2" "3" "4" # [1] "subject" "sex" "size" names(d);colnames(d) # [1] "subject" "sex" "size" # [1] "subject" "sex" "size" |
Indexing into a dataframe
d[[2,1]] #== d[2,][[1]] # 2 ## still "data.frame" d[1] #== d[,1,drop=F] # subject # 1 1 # 2 2 # 3 3 # 4 4 d[1,] # subject sex size # 1 1 M 4 d[2,][1] # subject # 2 2 ## now its atomic vector d[,2] # [1] M F F M # Levels: F M d[,1] d[,"subject"] d[[1]] d[["subject"]] d$subject # [1] 1 2 3 4 which(d$sex!="M") # 2 3 d[d$sex=="M",] subset(d, sex == "M") # subject sex size # 1 1 M 4 # 4 4 M 3 ## Note: you cant asign value with subset function d[d$sex=="M",]$sex <- "F" # subject sex size # 1 1 F 4 # 2 2 F 2 # 3 3 F 5 # 4 4 F 3 |
Unique & Duplicated
d <- rbind(d,data.frame(subject = c(5,2), sex = "M", size = c(2,5))) # subject sex size # 1 1 F 4 # 2 2 F 2 # 3 3 F 5 # 4 4 F 3 # 5 5 M 2 # 6 2 M 5 unique(d$sex) # [1] F M # Levels: F M match(unique(d$sex),d$sex) # 1 5 ## Find a duplicate value ## First instance of a particular value will not be counted duplicated(d$sex) # [1] FALSE TRUE TRUE TRUE FALSE TRUE d[duplicated(d$sex),] # subject sex size # 2 2 F 2 # 3 3 F 5 # 4 4 F 3 # 6 2 M 5 d[!duplicated(d$sex),] # subject sex size # 1 1 F 4 # 5 5 M 2 d[match(unique(d$sex),d$sex),] # subject sex size # 1 1 F 4 # 5 5 M 2 |
Subset
# Subset of particular rows and columns subset(d, subject < 3, select = -subject) subset(d, subject < 3, select = c(sex,size)) subset(d, subject < 3, select = sex:size) d[d$subject < 3, c("sex","size")] # sex size # 1 F 4 # 2 F 2 # 6 M 5 d[d$subject==1 | d$sex=="M",] subset(d, subject ==1 | sex =="M") d[d$subject%in% c(1,5),] subset(d, subject %in% c(1,5)) # subject sex size # 1 1 F 4 # 5 5 M 2 # Randomizing order set.seed(0) d[sample(nrow(d)),] # subject sex size # 6 2 M 5 # 2 2 F 2 # 5 5 M 2 # 4 4 F 3 # 3 3 F 5 # 1 1 F 4 |
Factors
str(d) # 'data.frame': 6 obs. of 3 variables: # $ subject: num 1 2 3 4 5 2 # $ sex : Factor w/ 2 levels "F","M": 1 1 1 1 2 2 # $ size : num 4 2 5 3 2 5 ## Apply the factor() function to those columns, and assign then back into d fac <- vapply(d, is.factor, logical(1)) #== fac <- sapply(d, is.factor) # subject sex size # FALSE TRUE FALSE d <- data.frame(d,stringsAsFactors = F) # d[,2] <- lapply(d[,2],as.character) # 'data.frame': 4 obs. of 3 variables: # $ subject: int 1 2 3 4 # $ sex : Factor w/ 2 levels "F","M": 2 1 1 2 # $ size : num 4 2 5 3 |
Se7en
from wikipedia
Se7en <- list(sins = c("lust", "gluttony", "greed", "sloth", "wrath", "envy", "pride"), virtues = c("Chastity", "Temperance", "Charity", "Diligence", "Patience", "Kindness", "Humility"), gloss = c("purity", "abstinence","Humanity" ,"equanimity","will", "benevolence", "generosity", "sacrifice", "persistence", "Effort", "ethics", "forgiveness", "mercy", "satisfaction", "compassion", "bravery", "modesty", "reverence"), index = c(2,2,4,3,2,2,3)) |
S <- data.frame(sins = Se7en[[1]],virtues = Se7en[[2]]) g <- split(Se7en$gloss,rep(1:length(Se7en$index),Se7en$index)) # $`1` # [1] "purity" "abstinence" # # $`2` # [1] "Humanity" "equanimity" # # $`3` # [1] "will" "benevolence" "generosity" "sacrifice" # # $`4` # [1] "persistence" "Effort" "ethics" # # $`5` # [1] "forgiveness" "mercy" # # $`6` # [1] "satisfaction" "compassion" # # $`7` # [1] "bravery" "modesty" "reverence" S$gloss = g # sins virtues gloss # 1 lust Chastity purity, abstinence # 2 gluttony Temperance Humanity, equanimity # 3 greed Charity will, benevolence, generosity, sacrifice # 4 sloth Diligence persistence, Effort, ethics # 5 wrath Patience forgiveness, mercy # 6 envy Kindness satisfaction, compassion # 7 pride Humility bravery, modesty, reverence str(data.frame(S,gloss = I(g))) # 'data.frame': 7 obs. of 4 variables: # $ sins : Factor w/ 7 levels "envy","gluttony",..: 4 2 3 6 7 1 5 # $ virtues: Factor w/ 7 levels "Charity","Chastity",..: 2 7 1 3 6 5 4 # $ gloss :List of 7 # ..$ 1: chr "purity" "abstinence" # ..$ 2: chr "Humanity" "equanimity" # ..$ 3: chr "will" "benevolence" "generosity" "sacrifice" # ..$ 4: chr "persistence" "Effort" "ethics" # ..$ 5: chr "forgiveness" "mercy" # ..$ 6: chr "satisfaction" "compassion" # ..$ 7: chr "bravery" "modesty" "reverence" # $ gloss.1:List of 7 # ..$ 1: chr "purity" "abstinence" # ..$ 2: chr "Humanity" "equanimity" # ..$ 3: chr "will" "benevolence" "generosity" "sacrifice" # ..$ 4: chr "persistence" "Effort" "ethics" # ..$ 5: chr "forgiveness" "mercy" # ..$ 6: chr "satisfaction" "compassion" # ..$ 7: chr "bravery" "modesty" "reverence" # ..- attr(*, "class")= chr "AsIs" |
Math
Ly Series
- lapply: Loop over a list or data.frame and evaluate a function on each element
- sapply: Same as lapply but try to simplify the result
- vapply: Same as sapply but can speicfy which type to return
- apply: Apply a function over the margins of an array
- tapply: Apply a function over subsets of a vector
- mapply: Multivariate version of lapply
Table & Xtabs
d <- read.table(header = T,text = " subject sex size 1 1 F 4 2 2 F 2 3 3 F 5 4 4 F 3 5 5 M 2 ") contingency_table <- table(d[-1]) # size # sex 2 3 4 5 # F 1 1 1 1 # M 1 0 0 0 ## A data frame of counts counts_table <- as.data.frame(contingency_table) # sex size Freq # 1 F 2 1 # 2 M 2 1 # 3 F 3 1 # 4 M 3 0 # 5 F 4 1 # 6 M 4 0 # 7 F 5 1 # 8 M 5 0 xtabs(Freq ~ sex + size, counts_table) # size # sex 2 3 4 5 # F 1 1 1 1 # M 1 0 0 0 ## Convert from data frame of counts to data frame of cases countsToCases <- function(x, countcol = "Freq") { # Get the row indices to pull from x idx <- rep(seq(1:nrow(x)), x[[countcol]]) # Drop count column x[[countcol]] <- NULL x[idx, ] } countsToCases(counts_table) # sex size # 1 F 2 # 2 M 2 # 3 F 3 # 5 F 4 # 7 F 5 |
Tapply & Apply
tapply(d$size , d$sex) # 1 1 1 1 2 tapply(d$size , d$sex, sum) # F M # 14 2 tapply(d$size , d$sex, range) # $F # [1] 2 5 # # $M # [1] 2 2 tapply(d$size , d$sex, range, simplify = T) apply(d[-2],1,sum) # 1 2 3 4 5 # 5 4 8 7 7 apply(d[-2],2,sum) # subject size # 15 16 |
f <- factor(rep(1:3, 10), levels = 1:5) tapply(1:30, f, range) # $`1` # [1] 1 28 # # $`2` # [1] 2 29 # # $`3` # [1] 3 30 # # $`4` # NULL # # $`5` # NULL |
Lapply & Sapply
dat <- list(a = contingency_table,b = counts_table) lapply(dat, function(e) e[,1]) # $a # F M # 1 1 # # $b # [1] F M F M F M F M # Levels: F M lapply(d[-2], sum) #== sapply(d[-2], sum, simpilfy = F) # $subject # [1] 15 # # $size # [1] 16 sapply(d[-2], sum) # subject size # 15 16 sapply(d[-2], tabulate) # subject size # [1,] 1 0 # [2,] 1 2 # [3,] 1 1 # [4,] 1 1 # [5,] 1 1 sapply(6:8, seq) # [[1]] # [1] 1 2 3 4 5 6 # # [[2]] # [1] 1 2 3 4 5 6 7 # # [[3]] # [1] 1 2 3 4 5 6 7 8 |
d <- data.frame(subject = letters[1:5], sex = letters[1:5], size =1:5,stringsAsFactors = F) data.frame(sapply(d, function(v) { if (is.character(v)) return(toupper(v)) else return(v) })) # subject sex size # 1 A A 1 # 2 B B 2 # 3 C C 3 # 4 D D 4 # 5 E E 5 |
a <- 1:10 names(a) <- letters[1:10] b <- 2:14 names(b) <- letters[3:15] # a # a b c d e f g h i j # 1 2 3 4 5 6 7 8 9 10 # b # c d e f g h i j k l m n o # 2 3 4 5 6 7 8 9 10 11 12 13 14 sapply(1:length(a),function(i){ if (names(a)[i] %in% names(b)) a[i] + b[[names(a)[i]]] else a[i] }) # a b c d e f g h i j # 1 2 5 7 9 11 13 15 17 19 |
Mapply
mapply(rep, 1:4, 4:1) # [[1]] # [1] 1 1 1 1 # # [[2]] # [1] 2 2 2 # # [[3]] # [1] 3 3 # # [[4]] # [1] 4 mapply(sum, 1:10, 100:90) # 101 101 101 101 101 101 101 101 101 101 91 |
word <- function(C, k) paste(rep(C, k), collapse = "") mapply(word, LETTERS[1:6], 6:1) # A B C D E F # "AAAAAA" "BBBBB" "CCCC" "DDD" "EE" "F" |
Control Structures
IF
x <- c(5:-5) sqrt(ifelse(x >= 0, x, NA)) # [1] 2.236068 2.000000 1.732051 1.414214 1.000000 0.000000 NA NA NA # [10] NA NA |
## Finding the leap year # year = as.integer(readline(prompt="Enter a year: ")) year = 2018 if((year %% 4) == 0) { if((year %% 100) == 0) { if((year %% 400) == 0) { print(paste(year,"is a leap year")) } else { print(paste(year,"is not a leap year")) } } else { print(paste(year,"is a leap year")) } } else { print(paste(year,"is not a leap year")) } # "2018 is not a leap year" |
For
## print a-j x <- letters[1:10] for(i in 1:4) { print(x[i]) } for(i in seq_along(x)) { print(x[i]) } for(letter in x) { print(letter) } for(i in 1:length(x)) print(x[i]) |
x <- c(50:-50) for (i in 1:length(x)){ if (x[i] < 0) { break } else if(round(sqrt(x[i]))**2!=x[i]){ next } else{ cat(x[i]," ") } } # 49 36 25 16 9 4 1 0 |
## Finding the Highest Common Factor/Greatest Common Divisor hcf <- function(x, y) { # the smaller number if(x > y) { smaller = y } else { smaller = x } for(i in 1:smaller) { if((x %% i == 0) && (y %% i == 0)) { hcf = i } } return(hcf) } # num1 = as.integer(readline(prompt = "Enter first number: ")) # num2 = as.integer(readline(prompt = "Enter second number: ")) num1 = 17; num2 = 17*12 print(paste("The H.C.F. of", num1,"and", num2,"is", hcf(num1, num2))) # "The H.C.F. of 17 and 204 is 17" |
While
## Finding the H.C.F. using Euclidean algorithm hcf <- function(x, y) { while(y) { temp = y y = x %% y x = temp } return(x) } num1 = 16*2; num2 = 3*16 print(paste("The H.C.F. of", num1,"and", num2,"is", hcf(num1, num2))) # "The H.C.F. of 32 and 48 is 16" |
## Finding the Lowest Common Multiple lcm <- function(x, y) { # the greater number if(x > y) { greater = x } else { greater = y } while(TRUE) { if((greater %% x == 0) && (greater %% y == 0)) { lcm = greater break } greater = greater + 1 } return(lcm) } num1 = 16*2; num2 = 3*16 print(paste("The L.C.M. of", num1,"and", num2,"is", lcm(num1, num2))) # "The L.C.M. of 32 and 48 is 96" |
And we can conclude that Number1 * Number2 = L.C.M. * G.C.D
Strings
Dirty Data
REFERENCES
- http://www.cookbook-r.com/
- R Documentation
All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.
Comment





