24.3 Stop words and Frequencies
tidy_books %>%
count(word, sort = TRUE)
## # A tibble: 14,520 x 2
## word n
## <chr> <int>
## 1 the 26351
## 2 to 24044
## 3 and 22515
## 4 of 21178
## 5 a 13408
## 6 her 13055
## 7 i 12006
## 8 in 11217
## 9 was 11204
## 10 it 10234
## # ... with 14,510 more rows
tidy_books %>%
count(word, sort = TRUE)
## # A tibble: 13,914 x 2
## word n
## <chr> <int>
## 1 miss 1855
## 2 time 1337
## 3 fanny 862
## 4 dear 822
## 5 lady 817
## 6 sir 806
## 7 day 797
## 8 emma 787
## 9 sister 727
## 10 house 699
## # ... with 13,904 more rows
library(ggplot2)
tidy_books %>%
count(word, sort = TRUE) %>%
filter(n > 600) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n)) +
geom_col() +
xlab(NULL) +
coord_flip()