Examples for 'dplyr::case_when'


A general vectorised if

Aliases: case_when

Keywords:

### ** Examples

x <- 1:50
case_when(
  x %% 35 == 0 ~ "fizz buzz",
  x %% 5 == 0 ~ "fizz",
  x %% 7 == 0 ~ "buzz",
  TRUE ~ as.character(x)
)
 [1] "1"         "2"         "3"         "4"         "fizz"      "6"        
 [7] "buzz"      "8"         "9"         "fizz"      "11"        "12"       
[13] "13"        "buzz"      "fizz"      "16"        "17"        "18"       
[19] "19"        "fizz"      "buzz"      "22"        "23"        "24"       
[25] "fizz"      "26"        "27"        "buzz"      "29"        "fizz"     
[31] "31"        "32"        "33"        "34"        "fizz buzz" "36"       
[37] "37"        "38"        "39"        "fizz"      "41"        "buzz"     
[43] "43"        "44"        "fizz"      "46"        "47"        "48"       
[49] "buzz"      "fizz"     
# Like an if statement, the arguments are evaluated in order, so you must
# proceed from the most specific to the most general. This won't work:
case_when(
  TRUE ~ as.character(x),
  x %%  5 == 0 ~ "fizz",
  x %%  7 == 0 ~ "buzz",
  x %% 35 == 0 ~ "fizz buzz"
)
 [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
[16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
[31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
[46] "46" "47" "48" "49" "50"
# If none of the cases match, NA is used:
case_when(
  x %%  5 == 0 ~ "fizz",
  x %%  7 == 0 ~ "buzz",
  x %% 35 == 0 ~ "fizz buzz"
)
 [1] NA     NA     NA     NA     "fizz" NA     "buzz" NA     NA     "fizz"
[11] NA     NA     NA     "buzz" "fizz" NA     NA     NA     NA     "fizz"
[21] "buzz" NA     NA     NA     "fizz" NA     NA     "buzz" NA     "fizz"
[31] NA     NA     NA     NA     "fizz" NA     NA     NA     NA     "fizz"
[41] NA     "buzz" NA     NA     "fizz" NA     NA     NA     "buzz" "fizz"
# Note that NA values in the vector x do not get special treatment. If you want
# to explicitly handle NA values you can use the `is.na` function:
x[2:4] <- NA_real_
case_when(
  x %% 35 == 0 ~ "fizz buzz",
  x %% 5 == 0 ~ "fizz",
  x %% 7 == 0 ~ "buzz",
  is.na(x) ~ "nope",
  TRUE ~ as.character(x)
)
 [1] "1"         "nope"      "nope"      "nope"      "fizz"      "6"        
 [7] "buzz"      "8"         "9"         "fizz"      "11"        "12"       
[13] "13"        "buzz"      "fizz"      "16"        "17"        "18"       
[19] "19"        "fizz"      "buzz"      "22"        "23"        "24"       
[25] "fizz"      "26"        "27"        "buzz"      "29"        "fizz"     
[31] "31"        "32"        "33"        "34"        "fizz buzz" "36"       
[37] "37"        "38"        "39"        "fizz"      "41"        "buzz"     
[43] "43"        "44"        "fizz"      "46"        "47"        "48"       
[49] "buzz"      "fizz"     
# All RHS values need to be of the same type. Inconsistent types will throw an error.
# This applies also to NA values used in RHS: NA is logical, use
# typed values like NA_real_, NA_complex, NA_character_, NA_integer_ as appropriate.
case_when(
  x %% 35 == 0 ~ NA_character_,
  x %% 5 == 0 ~ "fizz",
  x %% 7 == 0 ~ "buzz",
  TRUE ~ as.character(x)
)
 [1] "1"    NA     NA     NA     "fizz" "6"    "buzz" "8"    "9"    "fizz"
[11] "11"   "12"   "13"   "buzz" "fizz" "16"   "17"   "18"   "19"   "fizz"
[21] "buzz" "22"   "23"   "24"   "fizz" "26"   "27"   "buzz" "29"   "fizz"
[31] "31"   "32"   "33"   "34"   NA     "36"   "37"   "38"   "39"   "fizz"
[41] "41"   "buzz" "43"   "44"   "fizz" "46"   "47"   "48"   "buzz" "fizz"
case_when(
  x %% 35 == 0 ~ 35,
  x %% 5 == 0 ~ 5,
  x %% 7 == 0 ~ 7,
  TRUE ~ NA_real_
)
 [1] NA NA NA NA  5 NA  7 NA NA  5 NA NA NA  7  5 NA NA NA NA  5  7 NA NA NA  5
[26] NA NA  7 NA  5 NA NA NA NA 35 NA NA NA NA  5 NA  7 NA NA  5 NA NA NA  7  5
# case_when() evaluates all RHS expressions, and then constructs its
# result by extracting the selected (via the LHS expressions) parts.
# In particular NaNs are produced in this case:
y <- seq(-2, 2, by = .5)
case_when(
  y >= 0 ~ sqrt(y),
  TRUE   ~ y
)
Warning in sqrt(y): NaNs produced
[1] -2.0000000 -1.5000000 -1.0000000 -0.5000000  0.0000000  0.7071068  1.0000000
[8]  1.2247449  1.4142136
# This throws an error as NA is logical not numeric
try(case_when(
  x %% 35 == 0 ~ 35,
  x %% 5 == 0 ~ 5,
  x %% 7 == 0 ~ 7,
  TRUE ~ NA
))
Error in case_when(x%%35 == 0 ~ 35, x%%5 == 0 ~ 5, x%%7 == 0 ~ 7, TRUE ~  : 
  
# case_when is particularly useful inside mutate when you want to
# create a new variable that relies on a complex combination of existing
# variables
starwars %>%
  select(name:mass, gender, species) %>%
  mutate(
    type = case_when(
      height > 200 | mass > 200 ~ "large",
      species == "Droid"        ~ "robot",
      TRUE                      ~ "other"
    )
  )
# A tibble: 87 × 6
   name               height  mass gender    species type 
   <chr>               <int> <dbl> <chr>     <chr>   <chr>
 1 Luke Skywalker        172    77 masculine Human   other
 2 C-3PO                 167    75 masculine Droid   robot
 3 R2-D2                  96    32 masculine Droid   robot
 4 Darth Vader           202   136 masculine Human   large
 5 Leia Organa           150    49 feminine  Human   other
 6 Owen Lars             178   120 masculine Human   other
 7 Beru Whitesun lars    165    75 feminine  Human   other
 8 R5-D4                  97    32 masculine Droid   robot
 9 Biggs Darklighter     183    84 masculine Human   other
10 Obi-Wan Kenobi        182    77 masculine Human   other
# … with 77 more rows
# `case_when()` is not a tidy eval function. If you'd like to reuse
# the same patterns, extract the `case_when()` call in a normal
# function:
case_character_type <- function(height, mass, species) {
  case_when(
    height > 200 | mass > 200 ~ "large",
    species == "Droid"        ~ "robot",
    TRUE                      ~ "other"
  )
}

case_character_type(150, 250, "Droid")
[1] "large"
case_character_type(150, 150, "Droid")
[1] "robot"
# Such functions can be used inside `mutate()` as well:
starwars %>%
  mutate(type = case_character_type(height, mass, species)) %>%
  pull(type)
 [1] "other" "robot" "robot" "large" "other" "other" "other" "robot" "other"
[10] "other" "other" "other" "large" "other" "other" "large" "other" "other"
[19] "other" "other" "other" "robot" "other" "other" "other" "other" "other"
[28] "other" "other" "other" "other" "other" "other" "other" "large" "large"
[37] "other" "other" "other" "other" "other" "other" "other" "other" "other"
[46] "other" "other" "other" "other" "other" "other" "other" "other" "large"
[55] "other" "other" "other" "other" "other" "other" "other" "other" "other"
[64] "other" "other" "other" "other" "other" "large" "large" "other" "other"
[73] "robot" "other" "other" "other" "large" "large" "other" "other" "large"
[82] "other" "other" "other" "robot" "other" "other"
# `case_when()` ignores `NULL` inputs. This is useful when you'd
# like to use a pattern only under certain conditions. Here we'll
# take advantage of the fact that `if` returns `NULL` when there is
# no `else` clause:
case_character_type <- function(height, mass, species, robots = TRUE) {
  case_when(
    height > 200 | mass > 200      ~ "large",
    if (robots) species == "Droid" ~ "robot",
    TRUE                           ~ "other"
  )
}

starwars %>%
  mutate(type = case_character_type(height, mass, species, robots = FALSE)) %>%
  pull(type)
 [1] "other" "other" "other" "large" "other" "other" "other" "other" "other"
[10] "other" "other" "other" "large" "other" "other" "large" "other" "other"
[19] "other" "other" "other" "other" "other" "other" "other" "other" "other"
[28] "other" "other" "other" "other" "other" "other" "other" "large" "large"
[37] "other" "other" "other" "other" "other" "other" "other" "other" "other"
[46] "other" "other" "other" "other" "other" "other" "other" "other" "large"
[55] "other" "other" "other" "other" "other" "other" "other" "other" "other"
[64] "other" "other" "other" "other" "other" "large" "large" "other" "other"
[73] "other" "other" "other" "other" "large" "large" "other" "other" "large"
[82] "other" "other" "other" "other" "other" "other"

[Package dplyr version 1.0.9 Index]