Datasets(数据集)

ToothGrowth

该数据集描述了
如果给60只几内亚猪吃不同量的维C(0.5, 1, and 2 mg/day)和不同类型的维C(橙汁,OJ/维C片,VC), 那它们牙齿的成齿质细胞(odontoblasts)长度会如何变化呢?

  • len: numeric Tooth length
  • supp: factor Supplement type (VC or OJ).
  • dose: numeric Dose in milligrams/day

PlantGrowth

该数据集则描述了不同情况下,植物干重的变化 - weight: numeric - group: factor

Load data(载入数据)

library(tidyverse)
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
#    len supp dose
# 1  4.2   VC  0.5
# 2 11.5   VC  0.5
# 3  7.3   VC  0.5
# 4  5.8   VC  0.5
# 5  6.4   VC  0.5
# 6 10.0   VC  0.5
head(PlantGrowth)
##   weight group
## 1   4.17  ctrl
## 2   5.58  ctrl
## 3   5.18  ctrl
## 4   6.11  ctrl
## 5   4.50  ctrl
## 6   4.61  ctrl
#   weight group
# 1   4.17  ctrl
# 2   5.58  ctrl
# 3   5.18  ctrl
# 4   6.11  ctrl
# 5   4.50  ctrl
# 6   4.61  ctrl
ToothGrowth$dose <- factor(ToothGrowth$dose)
t <- ggplot(ToothGrowth, aes(x=supp, y=len, fill = supp)) + geom_boxplot()
p <- ggplot(PlantGrowth, aes(x=group, y=weight, fill = group)) + geom_boxplot() 

Scales(标度)

There are many kinds of scales. They take the form scale_xxx_yyy.
Here are some commonly-used values of xxx and yyy:

xxx Description
colour Color of lines and points
fill Color of area fills (e.g. bar graph,legend)
linetype Solid/dashed/dotted lines
shape Shape of points
size Size of points
alpha Opacity/transparency
yyy Description
hue Equally-spaced colors from the color wheel
manual Manually-specified values (e.g., colors, point shapes, line types)
gradient Color gradient
grey Shades of grey
discrete Discrete values (e.g., colors, point shapes, line types, point sizes)
continuous Continuous values (e.g., alpha, colors, point sizes)

Legends(图例)

Hide lengends

The simplest way is to use guides(fill=FALSE), replacing fill with the desired aesthetic.

Or use scale_fill_discrete and theme options instead

# These are equivalent
# p + scale_fill_discrete(guide=FALSE)
# p + theme(legend.position="none")
p + guides(fill=FALSE)

Lengend order

To changes the order of items to trt1, ctrl, trt2

#  These are equivalent
# p + guides(fill = guide_legend(reverse=TRUE))
# p + scale_fill_discrete(guide = guide_legend(reverse=TRUE))
# p + scale_fill_discrete(breaks = rev(levels(PlantGrowth$group)))
p + scale_fill_discrete(breaks=c("trt2","trt1","ctrl"))

Lengend manual scale

p + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"), 
                       name="Experimental\nCondition",
                       breaks=c("ctrl", "trt1", "trt2"),
                       labels=c("Control", "Treatment 1", "Treatment 2"))

Lengend theme

p +     theme(legend.title = element_text(colour="blue", size=16, face="bold"),
        legend.text = element_text(colour="blue", size = 16, face = "bold"),
        legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"),
        legend.position="top")

Legend position

You can specify the legend position inside the plotting area Note : the numeric position below is relative to the entire area, including titles and labels, not just the plotting area.

p + theme(legend.position=c(.2, .8))

# Set the "anchoring point" of the legend (bottom-left is 0,0; top-right is 1,1)
p + theme(legend.justification=c(1,0), legend.position=c(1,0))

Axes(坐标轴)

  • ggplot(small)+geom_bar(aes(x=cut, fill=cut))+coord_flip()
  • ggplot(small)+geom_bar(aes(x=factor(1), fill=cut))+coord_polar(theta=“y”)
  • ggplot(small)+geom_bar(aes(x=factor(1), fill=cut))+coord_polar()
  • ggplot(small)+geom_bar(aes(x=clarity, fill=cut))+coord_polar()

X,Y axis

p + scale_x_discrete(limits = rev(levels(PlantGrowth$group))) # ctrl1 trt1 trt2 --> trt2 trt1 ctrl1

p + scale_x_discrete(breaks=c("ctrl", "trt1", "trt2"), 
                      labels=c("Control", "Treat 1", "Treat 2"))

# Hide x tick marks, labels, and grid lines
p + xlab("group") + scale_x_discrete(breaks=NULL,name="")

# Suppress y ticks marks, labels, and grid lines, set name, note the priority of scale
p + scale_y_continuous(breaks=NULL, name="Weight (Kg)") + ylab("Weight")

Continuous axis

  • aes(ymin = …, ymax = …)
  • expand_limits()
  • scale_y_continuous
  • xlim,ylim 上述几种会丢掉不在范围内的数据,然后画图,若要保留范围内数据画图,需使用coord_cartesian()
# Make sure to include 0 in the y axis
p + expand_limits(y=0)

# if any scale_y_continuous command is used, it overrides any ylim command
# These are equivalent
# t + expand_limits(y=c(0,20))
# t + scale_y_continuous(limits=c(0, 20))
p+ ylim(0, 20)

# t+ ylim(5, 10)
# Removed 13 rows containing non-finite values (stat_boxplot).
p + coord_cartesian(ylim = c(5, 10)) + 
    scale_y_continuous(breaks=seq(0, 10, 0.25))   # Ticks from 0-10, every .25

# Reversing the direction of an axis
p + scale_y_reverse()

Transformations

There are two ways of transforming an axis.

  • scale transform, the data is transformed before properties such as breaks (the tick locations) and range of the axis are decided.
  • coordinate transform, the transformation happens after the breaks and scale range are decided.

准备一下数据,由于ggplot2只是自带了scale_y_log10 与 scale_y_sqrt 两种变换,为了方便更多的坐标轴变换,需要加载`scales包

library(scales)  
set.seed(0)
n <- 100
dat <- data.frame(
    x = (1:n+rnorm(n,sd=1))/20,
    y = 2^((1:n+rnorm(n,sd=1))/20),
    z = factor(rep(c("A","B","C","D"),c(10,20,30,40)))
)
# A scatterplot with regular (linear) axis scaling
poi <- ggplot(dat, aes(x, y,color=z)) + geom_point()
poi

Coordinates Transformations

poi + coord_flip()

ggplot(dat) + geom_bar(aes(x=factor(1), fill=z))+coord_polar(theta="y")

ggplot(dat)+geom_bar(aes(x=factor(1), fill=z))+coord_polar()

ggplot(dat)+geom_bar(aes(x=round(x), fill=z))+coord_polar()

Axis Ratio

# It is also possible to set the scaling of the axes to ratios other than 1:1.
# Equal scaling, with each 1 on the x axis the same length as y on x axis
poi + coord_fixed(ratio=1/6)

# Force equal scaling
poi + coord_fixed()

Axis Transformations

# log2 scaling of the y axis (with visually-equal spacing)
poi + scale_y_continuous(trans=log2_trans())

# log2 coordinate transformation (with visually-diminishing spacing)
poi + coord_trans(y="log2")

Mannully Transformations

# set the axis tick marks to show exponents
poi + scale_y_continuous(trans = log2_trans(),
                        breaks = trans_breaks("log2", function(x) 2^x),
                       labels = trans_format("log2", math_format(2^.x)))

# Display your values as comma, percent, dollar, scientific or abbreviate
poi + scale_y_continuous(labels=percent) +
     scale_x_continuous(labels=dollar) 

Display numeric minutes in HH:MM:SS format

#  Create your own formatting function
HMS_formatter <- function(x) {
    h <- floor(x/60)
    m <- floor(x %% 60)
    s <- round(60*(x %% 1))                   # Round to nearest second
    lab <- sprintf('%02d:%02d:%02d', h, m, s) # Format the strings as HH:MM:SS
    lab <- gsub('^00:', '', lab)              # Remove leading 00: if present
    lab <- gsub('^0', '', lab)                # Remove leading 0 if present
}

# Lets combine them together
poi + scale_y_continuous(breaks = seq(1,5,0.25), label=HMS_formatter) +  
      coord_cartesian(ylim = c(1, 5)) +
      scale_x_continuous(labels=scientific) 

Lines(线)

Basic

use geom_hline because the y-axis is the continuous one, but it is also possible to use geom_vline (with xintercept) if the x-axis is continuous.

p + geom_hline(aes(yintercept=mean(PlantGrowth$weight)), colour="#990000", linetype="dashed")

Separate lines

To make separate lines for categorical value bar, use geom_errorbar. The error bars have no height – ymin=ymax. It also seems necessary to specify y for some reason, even though it doesn’t do anything.

# Using dplyr 
dat <-  PlantGrowth %>%
  group_by(group) %>%
  summarise(
    mean = mean(weight),
    ymin = min(weight),
    ymax = max(weight)
  )
# # A tibble: 3 x 4
#   group     mean  ymin  ymax
#   <fct> <dbl> <dbl> <dbl>
# 1 ctrl   5.03  4.17  6.11
# 2 trt1   4.66  3.59  6.03
# 3 trt2   5.53  4.92  6.31
pl <- ggplot(dat, aes(x=group, y=mean)) +
    geom_bar(stat="identity")
# Draw with separate lines for each bar
pl + geom_errorbar(aes(ymax=ymin+1, ymin=ymin+1), colour="#AA0000",width=.8,linetype = 1,size=2)

With two continuous axes

# Using dplyr 
dat <-  ToothGrowth %>%
  group_by(dose) %>%
  summarise(
    mean = mean(len),
    ymin = min(len),
    ymax = max(len)
  )
# A tibble: 3 x 4
#   dose   mean  ymin  ymax
#   <fct> <dbl> <dbl> <dbl>
# 1 0.5    10.6  4.20  21.5
# 2 1      19.7 13.6   27.3
# 3 2      26.1 18.5   33.9
Tooth <- ggplot(ToothGrowth, aes(x=dose, y=len,color=dose)) + geom_point(position = "jitter") + facet_grid(. ~ dose)
Tooth + geom_hline(aes(yintercept=mean),data=dat) +
      geom_vline(aes(xintercept=as.numeric(dose)),data=dat,
                    colour="#990000", linetype="dashed")

Tooth + geom_hline(aes(yintercept=mean),data=dat) +
        geom_linerange(aes(x=dose,y=NULL,ymin=ymin, ymax=ymax), data=dat)
## Warning: Ignoring unknown aesthetics: y

Shapes(形状)

我们回到mtcars数据集 If you want to use hollow shapes, without manually declaring each shape, you can use scale_shape(solid=FALSE). Note, however, that the lines will visible inside the shape. To avoid this, you can use shapes 21-25 and specify a white fill

# Hollow shapes
mtcars$cyl = factor(mtcars$cyl )
ggplot(mtcars, aes(x=wt, y=mpg,color=cyl)) +
    geom_line(aes(linetype=cyl), size = 1.5) +      
    geom_point(aes(shape=cyl),size = 4)  +       
    scale_shape(solid=FALSE)

# Shapes with white fill
ggplot(mtcars, aes(x=wt, y=mpg, color = cyl)) +
    geom_line(aes(linetype=cyl), size = 1.5) +    
    geom_point(aes(shape=cyl),fill = "white",size = 4)  +      
    scale_shape_manual(values=c(21,22,24))  # Shapes: Filled circle, rectangle,triangle

Annotation(注释)

p <- ggplot(mtcars, aes(x=wt, y=mpg,colour=factor(cyl),label=rownames(mtcars)))
p + annotate("rect", xmin = 3, xmax = 3.5, ymin = 22, ymax = 25, fill = "dark grey", alpha = .5) + 
    geom_text(hjust=0,vjust=-1,alpha=0.8,size = 2.5) + geom_point(size=2,aes(shape=factor(cyl))) +
    geom_segment(aes(x = 5, y = 35, xend = 3.5, yend = 25), arrow = arrow(length = unit(0.5, "cm")))

ggplot(data.frame(x=LETTERS[1:26],y=round(rnorm(26,1,2),2)),aes(x,y,fill=factor(y)))+
  geom_bar(stat="identity")+ 
  geom_abline(intercept = 0, slope = 0,size=1,colour='gray')+
  geom_text(aes(label=y),hjust=0.5, vjust=-0.5 )+
  scale_y_continuous(limits=c(-6,6)) +
  guides(fill=FALSE)

Theme(主题)

  • theme_bw() or theme(panel.background = element_blank()) : 白色背景
  • theme_grey(): 默认浅灰色
  • theme_minimal() : 简洁
  • ggthemes可以提供更多的ggplot2主题
p <- ggplot(mtcars, aes(mpg, wt))  + geom_point(aes(color = factor(cyl))) +geom_smooth(method = "lm", se = F, aes(color = factor(cyl))) + theme_minimal()
p

require(ggthemes)
p + theme_economist()

x <- LETTERS[1:10]; y <- abs(rnorm(10))
(p <- qplot(x=x, y=y, color=x, fill=x, geom=c('line','point'), group=1) +
    labs(title='The figure title.', xlab='Factor', ylab='Value') +
    theme(text=element_text(color='red', size=16),
          line=element_line(color='blue'),
          rect=element_rect(fill='white')))

p + theme(panel.background=element_rect(fill='transparent', color='gray'),
          legend.key=element_rect(fill='transparent', color='transparent'),
          axis.text=element_text(color='red'))

改变网格线(gridlines)

p + ggtitle("Hide all the gridlines") +
  theme(panel.grid.minor=element_blank(),
           panel.grid.major=element_blank())

p + ggtitle("Hide all the horizontal gridlines") +
    theme(panel.grid.minor.y=element_blank(),
           panel.grid.major.y=element_blank())

# Hide all tick marks and labels,title(on X axis), but keep the gridlines
p + theme(axis.ticks = element_blank(), axis.text.x = element_blank(),axis.title.x = element_blank())

改变文字(text)

# X-axis label: bold, red, and 20 points
# X-axis tick marks: rotate 90 degrees CCW, move to the left a bit (using vjust,
#   since the labels are rotated), and 16 points
p + labs(title = "Change the apperance") +
  theme(axis.title.x = element_text(face="bold", colour="#990000", size=20),
        axis.text.x  = element_text(angle=90, vjust=0.5, size=16),
        plot.title = element_text(size = 20, color = "red", hjust = 5, face = "bold",angle = 90))

改变分面(facet)

p <- ggplot(aes(x=cyl, y=mpg), data=mtcars) + geom_point(color="ForestGreen")
p + facet_grid(gear ~ cyl) +
    theme(strip.text.x = element_text(size=8, angle=75),
          strip.text.y = element_text(size=12, face="bold"),
          strip.background = element_rect(colour="red", fill="#CCCCFF"))

自定义主题

theme_black <- function (base_size = 12, base_family = "") 
{
        theme(line = element_line(colour = "Chartreuse", size = 0.5, 
                                  linetype = 1, lineend = "butt"), 
            rect = element_rect(fill = "black", colour = "black", 
                                size = 0.5, linetype = 1), 
            text = element_text(family = base_family, 
                                face = "plain", colour = "yellow", 
                                size = base_size, hjust = 0.5,
                                vjust = 0.5, angle = 0, 
                                lineheight = 0.9),
            panel.grid = element_blank(),
            panel.background = element_rect(fill="black"),
            panel.border = element_rect(color="azure", fill=NA),
            axis.line = element_line(color = "white"), 
            axis.text.x=element_text(color="deeppink"),
            axis.text.y=element_text(color="BlanchedAlmond"), 
            axis.ticks=element_line(color="white")
            )
}
p + theme_black()

一个栗子

ggplot(ToothGrowth, aes(x=dose, y=len, color=dose, fill=dose)) +
  geom_boxplot(outlier.colour = NULL, outlier.size=0) +
  stat_summary(geom = "crossbar", width=0.65, fatten=0,
               color="white", fun.data = function(x){
                 return(c(y=median(x), ymin=median(x), ymax=median(x))) }) +
  labs(title="box plot") + scale_fill_hue(guide=FALSE) +
  scale_color_hue(guide=FALSE) + theme_bw() +
  theme(panel.grid.major.x=element_blank(), axis.text.x=element_blank(),
        axis.ticks.x = element_blank(), axis.line.x = element_blank(),
        axis.title.x=element_blank(), axis.line.y = element_blank(),
        axis.title.y=element_blank(), axis.text.y = element_text(color="gray"),
        axis.ticks.y= element_line(color="gray"))

Fonts(字体)

With geom_text or annotate in ggplot2, you can set a number of properties of the text. geom_text is used to add text from the data frame, and annotateis used to add a single text element.

When controlling elements such as the title, legend, axis labels, and so on, you use element_text, which has the same parameters, except that size is points (not mm), and instead of fontface, it uses face. The default value of size depends on the element; for example, titles are larger than tick labels.

Name Default value
size 5
family “” (sans)
fontface plain
lineheight 1.2
angle 0
hjust 0.5
vjust 0.5
dat <- data.frame(
    y = 1:3,
    text = c("This is text", "Text with\nmultiple lines", "Some more text")
)

p <- ggplot(dat, aes(x=1, y=y)) + 
       scale_y_continuous(limits=c(0.5, 3.5), breaks=NULL) +
       scale_x_continuous(breaks=NULL)

p + geom_text(aes(label=text))

p + geom_text(aes(label=text), family="Times", fontface="italic", lineheight=.8) +
    annotate(geom="text", x=1, y=1.5, label="Annotation text", colour="red",
             size=7, family="Courier", fontface="bold", angle=30)

fonttable <- read.table(header=TRUE, sep=",", stringsAsFactors=FALSE,
                        text='
Short,Canonical
mono,Courier
sans,Helvetica
serif,Times
,AvantGarde
,Bookman
,Helvetica-Narrow
,NewCenturySchoolbook
,Palatino
,URWGothic
,URWBookman
,NimbusMon
URWHelvetica,NimbusSan
,NimbusSanCond
,CenturySch
,URWPalladio
URWTimes,NimbusRom
')

fonttable$pos <- 1:nrow(fonttable)

library(reshape2)
fonttable <- melt(fonttable, id.vars="pos", measure.vars=c("Short","Canonical"),
                  variable.name="NameType", value.name="Font")

# Make a table of faces. Make sure factors are ordered correctly
facetable <- data.frame(Face = factor(c("plain","bold","italic","bold.italic"),
                                      levels = c("plain","bold","italic","bold.italic")))

fullfonts <- merge(fonttable, facetable)

pf <- ggplot(fullfonts, aes(x=NameType, y=pos)) + 
             geom_text(aes(label=Font, family=Font, fontface=Face)) +
             facet_wrap(~ Face, ncol=2)
pf

Color(颜色)

The default colors in ggplot2 can be difficult to distinguish from one another because they have equal luminance. By default, the colors for discrete scales are evenly spaced around a HSL color circle. For example, if there are two colors, then they will be selected from opposite points on the circle; if there are three colors, they will be 120° apart on the color circle; and so on. The colors used for different numbers of levels are shown here:

# Setting luminance and saturation (chromaticity)
# Use luminance=25, instead of default 65
t + scale_fill_hue(l=25) # more darker

# Reduce saturation (chromaticity) from 100 to 50, and increase luminance
t + scale_fill_hue(l=25,c=25) 

Color Brewer

library(RColorBrewer)
display.brewer.all() # 如brewer.pal(12,Paired)

t + geom_point(aes(color = factor(dose)),position = position_jitter(0.2,0.3),size = 2)

t + geom_point(aes(color = factor(dose)),position = position_jitter(0.2,0.3),size = 2) +
    scale_fill_brewer()

t + geom_point(aes(color = factor(dose)),position = position_jitter(0.2,0.3),size = 2) + scale_fill_brewer(palette = "Set1")  

t + geom_point(aes(color = factor(dose)),position = position_jitter(0.2,0.3),size = 2) + scale_color_brewer(palette="Accent") + scale_fill_brewer(palette="Accent")