Chapter 4 Factor Variables
4.1 Creating Factors
library(tibble)
library(dplyr)
library(ggplot2)
= tibble(
my_dataframe age = c(12, 17, 11, 8),
gender = c("Male", "Male", "Female", "Female"),
review = c("C", "B", "B", "A")
)
= my_dataframe %>%
my_dataframe mutate(gender = factor(gender, levels = c("Male", "Female"))) %>%
mutate(review = factor(review, levels = c("C", "B", "A"), ordered = TRUE))
head(my_dataframe)
## # A tibble: 4 × 3
## age gender review
## <dbl> <fct> <ord>
## 1 12 Male C
## 2 17 Male B
## 3 11 Female B
## 4 8 Female A
So, we created an ordered as well as an unordered factor. If you want to see the values for the factor levels, use levels
.
levels(my_dataframe$gender)
## [1] "Male" "Female"
levels(my_dataframe$review)
## [1] "C" "B" "A"
4.2 Some other useful functions from forcats
library(forcats)
4.2.1 Modify factor levels
= my_dataframe %>%
my_dataframe mutate(review = fct_recode(review,
"Very Good" = "A",
"Good" = "B",
"Bad" = "C"))
head(my_dataframe)
## # A tibble: 4 × 3
## age gender review
## <dbl> <fct> <ord>
## 1 12 Male Bad
## 2 17 Male Good
## 3 11 Female Good
## 4 8 Female Very Good
No bad people.
= my_dataframe %>%
my_dataframe mutate(review = fct_recode(review,
"Good" = "Bad"))
head(my_dataframe)
## # A tibble: 4 × 3
## age gender review
## <dbl> <fct> <ord>
## 1 12 Male Good
## 2 17 Male Good
## 3 11 Female Good
## 4 8 Female Very Good
= my_dataframe %>%
my_dataframe group_by(gender) %>%
summarize(mean_age = mean(age, na.rm = TRUE)) %>%
mutate(gender = fct_reorder(gender, mean_age))
ggplot(my_dataframe) +
geom_point(aes(x = mean_age, y = gender))