7.9 Groupby
summarize(by_day, delay = mean(dep_delay, na.rm = TRUE))
## # A tibble: 365 x 4
## # Groups: year, month [?]
## year month day delay
## <int> <int> <int> <dbl>
## 1 2013 1 1 11.5
## 2 2013 1 2 13.9
## 3 2013 1 3 11.0
## 4 2013 1 4 8.95
## 5 2013 1 5 5.73
## 6 2013 1 6 7.15
## 7 2013 1 7 5.42
## 8 2013 1 8 2.55
## 9 2013 1 9 2.28
## 10 2013 1 10 2.84
## # ... with 355 more rows
Do a group by and perform multiple summarizations
by_month <- group_by(flights,
year, month)
by_month <- summarize(by_month,
delay = mean(dep_delay, na.rm = TRUE),
delay_std = sd(dep_delay, na.rm = TRUE)
)
by_month
## # A tibble: 12 x 4
## # Groups: year [?]
## year month delay delay_std
## <int> <int> <dbl> <dbl>
## 1 2013 1 10.0 36.4
## 2 2013 2 10.8 36.3
## 3 2013 3 13.2 40.1
## 4 2013 4 13.9 43.0
## 5 2013 5 13.0 39.4
## 6 2013 6 20.8 51.5
## 7 2013 7 21.7 51.6
## 8 2013 8 12.6 37.7
## 9 2013 9 6.72 35.6
## 10 2013 10 6.24 29.7
## 11 2013 11 5.44 27.6
## 12 2013 12 16.6 41.9
The above code can be re-written using the pipe, %>%
# i'm pretty sure this is easier to read and understand
by_month <- group_by(flights,
year, month) %>%
summarize(delay = mean(dep_delay, na.rm = TRUE),
delay_std = sd(dep_delay, na.rm = TRUE))
by_month
## # A tibble: 12 x 4
## # Groups: year [?]
## year month delay delay_std
## <int> <int> <dbl> <dbl>
## 1 2013 1 10.0 36.4
## 2 2013 2 10.8 36.3
## 3 2013 3 13.2 40.1
## 4 2013 4 13.9 43.0
## 5 2013 5 13.0 39.4
## 6 2013 6 20.8 51.5
## 7 2013 7 21.7 51.6
## 8 2013 8 12.6 37.7
## 9 2013 9 6.72 35.6
## 10 2013 10 6.24 29.7
## 11 2013 11 5.44 27.6
## 12 2013 12 16.6 41.9
Otherwise you will have to create a temp variable, or write a nested expression
summarize(group_by(flights, year, month),
delay = mean(dep_delay, na.rm = TRUE),
delay_std = sd(dep_delay, na.rm = TRUE))
## # A tibble: 12 x 4
## # Groups: year [?]
## year month delay delay_std
## <int> <int> <dbl> <dbl>
## 1 2013 1 10.0 36.4
## 2 2013 2 10.8 36.3
## 3 2013 3 13.2 40.1
## 4 2013 4 13.9 43.0
## 5 2013 5 13.0 39.4
## 6 2013 6 20.8 51.5
## 7 2013 7 21.7 51.6
## 8 2013 8 12.6 37.7
## 9 2013 9 6.72 35.6
## 10 2013 10 6.24 29.7
## 11 2013 11 5.44 27.6
## 12 2013 12 16.6 41.9