Chapter 4 Factor Variables

4.1 Creating Factors

library(tibble)
library(dplyr)
library(ggplot2)

my_dataframe = tibble(
  age = c(12, 17, 11, 8),
  gender = c("Male", "Male", "Female", "Female"),
  review = c("C", "B", "B", "A")
)

my_dataframe = my_dataframe %>%
  mutate(gender = factor(gender, levels = c("Male", "Female"))) %>% 
  mutate(review = factor(review, levels = c("C", "B", "A"), ordered = TRUE))

head(my_dataframe)
## # A tibble: 4 × 3
##     age gender review
##   <dbl> <fct>  <ord> 
## 1    12 Male   C     
## 2    17 Male   B     
## 3    11 Female B     
## 4     8 Female A

So, we created an ordered as well as an unordered factor. If you want to see the values for the factor levels, use levels.

levels(my_dataframe$gender)
## [1] "Male"   "Female"
levels(my_dataframe$review)
## [1] "C" "B" "A"

4.2 Some other useful functions from forcats

library(forcats)

4.2.1 Modify factor levels

my_dataframe = my_dataframe %>%
  mutate(review = fct_recode(review,
                             "Very Good" = "A",
                             "Good"      = "B", 
                             "Bad"       = "C"))
head(my_dataframe)
## # A tibble: 4 × 3
##     age gender review   
##   <dbl> <fct>  <ord>    
## 1    12 Male   Bad      
## 2    17 Male   Good     
## 3    11 Female Good     
## 4     8 Female Very Good

No bad people.

my_dataframe = my_dataframe %>%
  mutate(review = fct_recode(review,
                             "Good" = "Bad"))
head(my_dataframe)
## # A tibble: 4 × 3
##     age gender review   
##   <dbl> <fct>  <ord>    
## 1    12 Male   Good     
## 2    17 Male   Good     
## 3    11 Female Good     
## 4     8 Female Very Good
my_dataframe = my_dataframe %>%
  group_by(gender) %>%
  summarize(mean_age = mean(age, na.rm = TRUE)) %>%
  mutate(gender = fct_reorder(gender, mean_age))

ggplot(my_dataframe) + 
  geom_point(aes(x = mean_age, y = gender))