版权声明:本套课程材料开源,使用和分享必须遵守「创作共用许可协议 CC BY-NC-SA」(来源引用-非商业用途使用-以相同方式共享)。
## 数据准备
data = airquality
data$Month = as.factor(data$Month)
data$Temp.C = (data$Temp - 32) / 1.8 # 摄氏度 = (华氏度 - 32) / 1.8
str(data)'data.frame': 153 obs. of 7 variables:
$ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
$ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
$ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
$ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
$ Month : Factor w/ 5 levels "5","6","7","8",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Day : int 1 2 3 4 5 6 7 8 9 10 ...
$ Temp.C : num 19.4 22.2 23.3 16.7 13.3 ...
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggplot(data, aes(x=Temp.C)) +
geom_histogram(
aes(y=after_stat(density)),
## density, integrate to 1
binwidth=5,
color="black",
fill="grey")ggplot(data, aes(x=Temp.C)) +
geom_histogram(
aes(y=after_stat(ndensity)),
## density, maximum to 1
binwidth=5,
color="black",
fill="grey")ggplot(data, aes(x=Month, y=Temp.C)) +
geom_violin(aes(fill=Month)) +
scale_y_continuous(limits=c(10, 40)) +
labs(x="Month",
y="Temperature",
fill="Month")ggplot(data, aes(x=Month, y=Temp.C)) +
geom_violin(aes(fill=Month)) +
geom_boxplot(fill="white", width=0.3) +
scale_y_continuous(limits=c(10, 40)) +
labs(x="Month",
y="Temperature",
fill="Month")## install.packages("ggridges")
library(ggridges)
ggplot(data, aes(x=Temp.C, y=Month)) +
geom_density_ridges() +
theme_ridges()Picking joint bandwidth of 1.47
ggplot(data, aes(x=Temp.C, y=Month, fill=after_stat(x))) +
geom_density_ridges_gradient() +
labs(x="Temperature", y="Month") +
theme_ridges()Picking joint bandwidth of 1.47
ggplot(data, aes(x=Temp.C, y=Month, fill=after_stat(x))) +
geom_density_ridges_gradient(
scale=0.95, # 最高峰高度缩放到95%
show.legend=FALSE # 不显示图例
) +
scale_fill_viridis_c(option="C") + # Viridis配色方案
labs(x="Temperature", y="Month") +
theme_ridges()Picking joint bandwidth of 1.47
ggplot(data, aes(x=Temp.C, y=Month, fill=after_stat(x))) +
geom_density_ridges_gradient(
stat="binline", # 直方图统计转换
bins=20, # 20个直方图分段
scale=0.8, # 最高峰高度缩放到95%
show.legend=FALSE # 不显示图例
) +
scale_x_continuous(limits=c(10, 40)) +
scale_fill_viridis_c(option="C") +
labs(x="Temperature", y="Month") +
theme_ridges()## 数据准备:每月气温的估计边际均值(emmeans)
model = lm(Temp.C ~ Month, data)
# summary(emmeans(model, "Month"))
means = model %>% emmeans("Month") %>% summary()
means Month emmean SE df lower.CL upper.CL
5 18.6 0.664 148 17.3 19.9
6 26.2 0.675 148 24.8 27.5
7 28.8 0.664 148 27.5 30.1
8 28.9 0.664 148 27.6 30.2
9 24.9 0.675 148 23.6 26.3
Confidence level used: 0.95
ggplot(means, aes(x=Month, y=emmean)) +
geom_col(color="black",
fill="grey",
width=0.6) +
geom_errorbar(aes(ymin=lower.CL,
ymax=upper.CL),
width=0.1) +
labs(y="Mean Temperature",
title="Air Quality Data") +
theme_classic()## 数据准备:两因素组间ANOVA的估计边际均值(emmeans)
d = between.2
d$A = as.factor(d$A)
d$B = as.factor(d$B)
model = lm(SCORE ~ A * B, data=d)
means = model %>% emmeans("A", by="B") %>% summary()
meansB = 1:
A emmean SE df lower.CL upper.CL
1 4.00 0.682 18 2.57 5.43
2 3.75 0.682 18 2.32 5.18
B = 2:
A emmean SE df lower.CL upper.CL
1 4.00 0.682 18 2.57 5.43
2 8.00 0.682 18 6.57 9.43
B = 3:
A emmean SE df lower.CL upper.CL
1 4.75 0.682 18 3.32 6.18
2 12.00 0.682 18 10.57 13.43
Confidence level used: 0.95
ggplot(means, aes(x=A, y=emmean, fill=B)) +
geom_col(position="dodge", # 调整水平位置,躲避重叠图形
width=0.6) +
geom_errorbar(aes(ymin=lower.CL,
ymax=upper.CL),
position=position_dodge(0.6),
width=0.15,
color="black")ggplot(means, aes(x=A, y=emmean, fill=B)) +
geom_col(position="dodge", width=0.6) +
geom_errorbar(aes(ymin=lower.CL,
ymax=upper.CL),
position=position_dodge(0.6),
width=0.15,
color="black") +
scale_y_continuous(expand=expansion(add=0),
limits=c(0, 15),
breaks=seq(0, 15, 3)) +
scale_fill_brewer(palette="Set1") +
labs(x="A", y="SCORE") +
theme_classic()## 数据准备:每月气温的估计边际均值(emmeans)
model = lm(Temp.C ~ Month, data)
# summary(emmeans(model, "Month"))
means = model %>% emmeans("Month") %>% summary()
means Month emmean SE df lower.CL upper.CL
5 18.6 0.664 148 17.3 19.9
6 26.2 0.675 148 24.8 27.5
7 28.8 0.664 148 27.5 30.1
8 28.9 0.664 148 27.6 30.2
9 24.9 0.675 148 23.6 26.3
Confidence level used: 0.95
emmip(model, ~ Month, CIs=TRUE) +
labs(x="Month",
y="Mean Temperature",
title="Air Quality Data",
subtitle="Daily Temperature",
caption="* Error bar = 95% CI") +
theme_classic()`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C)) +
geom_point(color="grey") +
geom_smooth(method="lm", color="black")`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C)) +
geom_point(color="grey") +
geom_smooth(method="lm", color="black") +
geom_hline(yintercept=22, linetype="dashed", color="red")`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C)) +
geom_hline(yintercept=22, linetype="dashed", color="red") +
geom_point(color="grey") +
geom_smooth(method="lm", color="black")`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C)) +
geom_hline(yintercept=22, linetype="dashed", color="red") +
geom_vline(xintercept=9, linetype="dashed", color="blue") +
geom_point(color="grey") +
geom_smooth(method="lm", color="black")`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C, color=Month)) +
geom_point() +
scale_color_brewer(palette="Set1")ggplot(data, aes(x=Wind, y=Temp.C, color=Month)) +
geom_point() +
geom_smooth(method="lm", se=FALSE) +
scale_color_brewer(palette="Set1") +
labs(title="Temperature & Wind Speed")`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C, color=Month)) +
geom_point() +
geom_smooth(method="lm", se=FALSE) +
geom_smooth(method="lm", color="black") +
scale_x_continuous(limits=c(0, 21)) +
scale_y_continuous(limits=c(10, 40)) +
scale_color_brewer(palette="Set1") +
labs(title="Temperature & Wind Speed")`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x=Wind, y=Temp.C, color=Month, size=Solar.R)) +
geom_point(shape=21) +
scale_color_brewer(palette="Set1")Warning: Removed 7 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(data, aes(x=Wind, y=Temp.C, color=Month, size=Solar.R)) +
geom_point(shape=21) +
scale_color_brewer(palette="Set1") +
scale_size_continuous(breaks=seq(50, 300, 50))Warning: Removed 7 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(data, aes(x=Wind, y=Temp.C, color=Month, size=Solar.R)) +
geom_point(aes(fill=Month), alpha=0.2, shape=21) +
geom_point(shape=21) +
scale_color_brewer(palette="Set1") +
scale_fill_brewer(palette="Set1") +
scale_size_continuous(breaks=seq(50, 300, 50))Warning: Removed 7 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 7 rows containing missing values or values outside the scale range
(`geom_point()`).
Pearson's r and 95% confidence intervals:
─────────────────────────────────────────────────
r [95% CI] p N
─────────────────────────────────────────────────
Ozone-Solar.R 0.35 [ 0.17, 0.50] <.001 *** 111
Ozone-Wind -0.60 [-0.71, -0.47] <.001 *** 116
Ozone-Temp 0.70 [ 0.59, 0.78] <.001 *** 116
Ozone-Month 0.16 [-0.02, 0.34] .078 . 116
Ozone-Day -0.01 [-0.20, 0.17] .888 116
Solar.R-Wind -0.06 [-0.22, 0.11] .496 146
Solar.R-Temp 0.28 [ 0.12, 0.42] <.001 *** 146
Solar.R-Month -0.08 [-0.23, 0.09] .366 146
Solar.R-Day -0.15 [-0.31, 0.01] .070 . 146
Wind-Temp -0.46 [-0.57, -0.32] <.001 *** 153
Wind-Month -0.18 [-0.33, -0.02] .027 * 153
Wind-Day 0.03 [-0.13, 0.19] .739 153
Temp-Month 0.42 [ 0.28, 0.54] <.001 *** 153
Temp-Day -0.13 [-0.28, 0.03] .108 153
Month-Day -0.01 [-0.17, 0.15] .922 153
─────────────────────────────────────────────────
cor$plot +
labs(title="Correlation Plot") +
scale_fill_fermenter(
palette="RdBu",
direction=1,
limits=c(-1, 1),
breaks=seq(-1, 1, 0.2),
guide=guide_colorsteps(
barwidth=0.5,
barheight=10))Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
cor$plot +
labs(title="Correlation Plot") +
scale_fill_fermenter(
palette="Spectral",
direction=1,
limits=c(-1, 1),
breaks=seq(-1, 1, 0.2),
guide=guide_colorsteps(
barwidth=0.5,
barheight=10))Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
data = as.data.table(airquality)
data[, Date := as.Date(sprintf("1973-%02d-%02d", Month, Day))]
data[, Temp.C := (Temp - 32) / 1.8]
data Ozone Solar.R Wind Temp Month Day Date Temp.C
<int> <int> <num> <int> <int> <int> <Date> <num>
1: 41 190 7.4 67 5 1 1973-05-01 19.44
2: 36 118 8.0 72 5 2 1973-05-02 22.22
3: 12 149 12.6 74 5 3 1973-05-03 23.33
4: 18 313 11.5 62 5 4 1973-05-04 16.67
5: NA NA 14.3 56 5 5 1973-05-05 13.33
---
149: 30 193 6.9 70 9 26 1973-09-26 21.11
150: NA 145 13.2 77 9 27 1973-09-27 25.00
151: 14 191 14.3 75 9 28 1973-09-28 23.89
152: 18 131 8.0 76 9 29 1973-09-29 24.44
153: 20 223 11.5 68 9 30 1973-09-30 20.00
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data, aes(x=Date, y=Temp.C)) +
geom_line(color="grey") +
geom_point(aes(color=Temp.C)) +
geom_smooth(color="black") +
scale_color_distiller(palette="RdYlBu")`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data, aes(x=Date, y=Temp.C)) +
geom_line(linewidth=1) +
geom_point(aes(fill=Temp.C), shape=21) +
scale_x_date(date_labels="%m-%d", # "%Y-%m-%d"
date_breaks="1 month",
date_minor_breaks="7 days") +
scale_y_continuous(limits=c(10, 40)) +
scale_fill_distiller(palette="RdYlBu") +
labs(x=NULL,
y="Temperature",
title="Daily Temperature")作业要求:
ggplot2绘图代码,练习绘制变量的分布(直方图)、大小(柱形图)、关系(散点图)、
趋势(折线图),每种图绘制一个即可平台提交: