12.5 条形图

条形图特别适合分类变量的展示,我们这里展示钻石切割质量 cut 不同等级的数量,当然我们可以直接展示各类的数目,在图层 geom_bar 中指定 stat="identity"

# 需要映射数据框的两个变量,相当于自己先计算了每类的数量
with(diamonds, table(cut))
## cut
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
cut_df <- as.data.frame(table(diamonds$cut))
ggplot(cut_df, aes(x = Var1, y = Freq)) + geom_bar(stat = "identity")

ggplot(diamonds, aes(x = cut)) + geom_bar()
频数条形图

图 12.23: 频数条形图

还有另外三种表示方法

ggplot(diamonds, aes(x = cut)) + geom_bar(stat = "count")

ggplot(diamonds, aes(x = cut, y = ..count..)) + geom_bar()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(diamonds, aes(x = cut, y = stat(count))) + geom_bar()
## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

我们还可以在图 12.23 的基础上再添加一个分类变量钻石的纯净度 clarity,形成堆积条形图

ggplot(diamonds, aes(x = cut, fill = clarity)) + geom_bar()
堆积条形图

图 12.24: 堆积条形图

再添加一个分类变量钻石颜色 color 比较好的做法是分面

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar() +
  facet_grid(~cut)
分面堆积条形图

图 12.25: 分面堆积条形图

实际上,绘制图12.25包含了对分类变量的分组计数过程,如下

with(diamonds, table(cut, color))
##            color
## cut            D    E    F    G    H    I    J
##   Fair       163  224  312  314  303  175  119
##   Good       662  933  909  871  702  522  307
##   Very Good 1513 2400 2164 2299 1824 1204  678
##   Premium   1603 2337 2331 2924 2360 1428  808
##   Ideal     2834 3903 3826 4884 3115 2093  896

还有一种堆积的方法是按比例,而不是按数量,如图12.26

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "fill") +
  facet_grid(~cut)
比例堆积条形图

图 12.26: 比例堆积条形图

接下来就是复合条形图

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "dodge")
复合条形图

图 12.27: 复合条形图

再添加一个分类变量,就是需要分面大法了,图 12.27 展示了三个分类变量,其实我们还可以再添加一个分类变量用作分面的列依据

ggplot(diamonds, aes(x = color, fill = clarity)) +
  geom_bar(position = "dodge") +
  facet_grid(rows = vars(cut))
分面复合条形图

图 12.28: 分面复合条形图

12.28 展示的数据如下

with(diamonds, table(color, clarity, cut))
## , , cut = Fair
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    4   56   58   25    5    9    3    3
##     E    9   78   65   42   14   13    3    0
##     F   35   89   83   53   33   10    5    4
##     G   53   80   69   45   45   17    3    2
##     H   52   91   75   41   32   11    1    0
##     I   34   45   30   32   25    8    1    0
##     J   23   27   28   23   16    1    1    0
## 
## , , cut = Good
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    8  223  237  104   43   25   13    9
##     E   23  202  355  160   89   52   43    9
##     F   19  201  273  184  132   50   35   15
##     G   19  163  207  192  152   75   41   22
##     H   14  158  235  138   77   45   31    4
##     I    9   81  165  110  103   26   22    6
##     J    4   53   88   90   52   13    1    6
## 
## , , cut = Very Good
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D    5  314  494  309  175  141   52   23
##     E   22  445  626  503  293  298  170   43
##     F   13  343  559  466  293  249  174   67
##     G   16  327  474  479  432  302  190   79
##     H   12  343  547  376  257  145  115   29
##     I    8  200  358  274  205   71   69   19
##     J    8  128  182  184  120   29   19    8
## 
## , , cut = Premium
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D   12  421  556  339  131   94   40   10
##     E   30  519  614  629  292  121  105   27
##     F   34  523  608  619  290  146   80   31
##     G   46  492  566  721  566  275  171   87
##     H   46  521  655  532  336  118  112   40
##     I   24  312  367  315  221   82   84   23
##     J   13  161  209  202  153   34   24   12
## 
## , , cut = Ideal
## 
##      clarity
## color   I1  SI2  SI1  VS2  VS1 VVS2 VVS1   IF
##     D   13  356  738  920  351  284  144   28
##     E   18  469  766 1136  593  507  335   79
##     F   42  453  608  879  616  520  440  268
##     G   16  486  660  910  953  774  594  491
##     H   38  450  763  556  467  289  326  226
##     I   17  274  504  438  408  178  179   95
##     J    2  110  243  232  201   54   29   25
# 漫谈条形图 https://cosx.org/2017/10/discussion-about-bar-graph
set.seed(2020)
dat <- data.frame(
  age = rep(1:30, 2),
  gender = rep(c("man", "woman"), each = 30),
  num = sample(x = 1:100, size = 60, replace = T)
)
# 重叠
p1 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "identity", alpha = 0.5)
# 堆积
p2 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "stack")
# 双柱
p3 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "dodge")
# 百分比
p4 <- ggplot(data = dat, aes(x = age, y = num, fill = gender)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(y = "%")
(p1 + p2) / (p3 + p4)
条形图的四种常见形态

图 12.29: 条形图的四种常见形态

以数据集 diamonds 为例,按照纯净度 clarity 和切工 cut 分组统计钻石的数量,再按切工分组统计不同纯净度的钻石数量占比,如表 12.1 所示

library(data.table)
diamonds <- as.data.table(diamonds)
dat <- diamonds[, .(cnt = .N), by = .(cut, clarity)] %>% 
  .[, pct := cnt / sum(cnt), by = .(cut)] %>% 
  .[, pct_pp := paste0(cnt, " (", scales::percent(pct, accuracy = 0.01), ")") ]
# 分组计数 with(diamonds, table(clarity, cut))
dcast(dat, formula = clarity ~ cut, value.var = "pct_pp") %>% 
  knitr::kable(align = "crrrrr", caption = "数值和比例组合呈现")
表 12.1: 数值和比例组合呈现
clarity Fair Good Very Good Premium Ideal
I1 210 (13.04%) 96 (1.96%) 84 (0.70%) 205 (1.49%) 146 (0.68%)
SI2 466 (28.94%) 1081 (22.03%) 2100 (17.38%) 2949 (21.38%) 2598 (12.06%)
SI1 408 (25.34%) 1560 (31.80%) 3240 (26.82%) 3575 (25.92%) 4282 (19.87%)
VS2 261 (16.21%) 978 (19.93%) 2591 (21.45%) 3357 (24.34%) 5071 (23.53%)
VS1 170 (10.56%) 648 (13.21%) 1775 (14.69%) 1989 (14.42%) 3589 (16.65%)
VVS2 69 (4.29%) 286 (5.83%) 1235 (10.22%) 870 (6.31%) 2606 (12.09%)
VVS1 17 (1.06%) 186 (3.79%) 789 (6.53%) 616 (4.47%) 2047 (9.50%)
IF 9 (0.56%) 71 (1.45%) 268 (2.22%) 230 (1.67%) 1212 (5.62%)

分别以堆积条形图和百分比堆积条形图展示,添加注释到条形图上,见 12.30

p1 = ggplot(data = dat, aes(x = cut, y = cnt, fill = clarity)) +
  geom_col(position = "dodge") +
  geom_text(aes(label = cnt), position = position_dodge(1), vjust = -0.5) +
  geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
    position = position_dodge(1), vjust = 1, hjust = 0.5
  ) +
  scale_fill_brewer(palette = "Spectral") +
  labs(fill = "clarity", y = "", x = "cut") +
  theme_minimal() + 
  theme(legend.position = "top")

p2 = ggplot(data = dat, aes(y = cut, x = cnt, fill = clarity)) +
  geom_col(position = "fill") +
  geom_text(aes(label = cnt), position = position_fill(1), vjust = -0.5) +
  geom_text(aes(label = scales::percent(pct, accuracy = 0.1)),
    position = position_fill(1), vjust = 1, hjust = 0.5
  ) +
  scale_fill_brewer(palette = "Spectral") +
  scale_x_continuous(labels = scales::percent) +
  labs(fill = "clarity", y = "", x = "cut") +
  theme_minimal() + 
  theme(legend.position = "top")

p1 / p2
添加注释到条形图

图 12.30: 添加注释到条形图

借助 plotly 制作相应的动态百分比堆积条形图

ggplot(data = diamonds, aes(x = cut, fill = clarity)) +
  geom_bar(position = "dodge2") +
  scale_fill_brewer(palette = "Spectral")
百分比堆积条形图

图 12.31: 百分比堆积条形图

# 百分比堆积条形图
plotly::plot_ly(dat,
  x = ~cut, color = ~clarity, y = ~pct,
  colors = "Spectral", type = "bar",
  text = ~ paste0(
    cnt, "颗 <br>",
    "占比:", scales::percent(pct, accuracy = 0.1), "<br>"
  ),
  hoverinfo = "text"
) %>%
  plotly::layout(
    barmode = "stack",
    yaxis = list(tickformat = ".0%")
  ) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图

# `type = "histogram"` 以 cut 和 clarity 分组计数
plotly::plot_ly(diamonds,
  x = ~cut, color = ~clarity,
  colors = "Spectral", type = "histogram"
) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图

# 堆积图
plotly::plot_ly(diamonds,
  x = ~cut, color = ~clarity,
  colors = "Spectral", type = "histogram"
) %>%
  plotly::layout(
    barmode = "stack", 
    yaxis = list(title = "cnt"),
    legend = list(title = list(text = "clarity"))
  ) %>%
  plotly::config(displayModeBar = FALSE)

图 12.31: 百分比堆积条形图