Examples for 'forcats::fct_lump'


Lump together factor levels into "other"

Aliases: fct_lump fct_lump_min fct_lump_prop fct_lump_n fct_lump_lowfreq

Keywords:

### ** Examples

x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
x %>% table()
.
 A  B  C  D  E  F  G  H  I 
40 10  5 27  1  1  1  1  1 
x %>% fct_lump_n(3) %>% table()
.
    A     B     D Other 
   40    10    27    10 
x %>% fct_lump_prop(0.10) %>% table()
.
    A     B     D Other 
   40    10    27    10 
x %>% fct_lump_min(5) %>% table()
.
    A     B     C     D Other 
   40    10     5    27     5 
x %>% fct_lump_lowfreq() %>% table()
.
    A     D Other 
   40    27    20 
x <- factor(letters[rpois(100, 5)])
x
  [1] g i c e h d d e e d c c d e f d e b b d b e e d d f c f e c b d e d c g c
 [38] d d c d c d d f d f g a i h f a e e f i h f d c h f e d e i b f c h g h c
 [75] b f h c d f h e c f d b a c i e c d h e d e e d f b
Levels: a b c d e f g h i
table(x)
x
 a  b  c  d  e  f  g  h  i 
 3  8 16 23 18 14  4  9  5 
table(fct_lump_lowfreq(x))
    b     c     d     e     f     g     h     i Other 
    8    16    23    18    14     4     9     5     3 
# Use positive values to collapse the rarest
fct_lump_n(x, n = 3)
  [1] Other Other c     e     Other d     d     e     e     d     c     c    
 [13] d     e     Other d     e     Other Other d     Other e     e     d    
 [25] d     Other c     Other e     c     Other d     e     d     c     Other
 [37] c     d     d     c     d     c     d     d     Other d     Other Other
 [49] Other Other Other Other Other e     e     Other Other Other Other d    
 [61] c     Other Other e     d     e     Other Other Other c     Other Other
 [73] Other c     Other Other Other c     d     Other Other e     c     Other
 [85] d     Other Other c     Other e     c     d     Other e     d     e    
 [97] e     d     Other Other
Levels: c d e Other
fct_lump_prop(x, prop = 0.1)
  [1] Other Other c     e     Other d     d     e     e     d     c     c    
 [13] d     e     f     d     e     Other Other d     Other e     e     d    
 [25] d     f     c     f     e     c     Other d     e     d     c     Other
 [37] c     d     d     c     d     c     d     d     f     d     f     Other
 [49] Other Other Other f     Other e     e     f     Other Other f     d    
 [61] c     Other f     e     d     e     Other Other f     c     Other Other
 [73] Other c     Other f     Other c     d     f     Other e     c     f    
 [85] d     Other Other c     Other e     c     d     Other e     d     e    
 [97] e     d     f     Other
Levels: c d e f Other
# Use negative values to collapse the most common
fct_lump_n(x, n = -3)
  [1] g     i     Other Other Other Other Other Other Other Other Other Other
 [13] Other Other Other Other Other Other Other Other Other Other Other Other
 [25] Other Other Other Other Other Other Other Other Other Other Other g    
 [37] Other Other Other Other Other Other Other Other Other Other Other g    
 [49] a     i     Other Other a     Other Other Other i     Other Other Other
 [61] Other Other Other Other Other Other i     Other Other Other Other g    
 [73] Other Other Other Other Other Other Other Other Other Other Other Other
 [85] Other Other a     Other i     Other Other Other Other Other Other Other
 [97] Other Other Other Other
Levels: a g i Other
fct_lump_prop(x, prop = -0.1)
  [1] g     i     Other Other h     Other Other Other Other Other Other Other
 [13] Other Other Other Other Other b     b     Other b     Other Other Other
 [25] Other Other Other Other Other Other b     Other Other Other Other g    
 [37] Other Other Other Other Other Other Other Other Other Other Other g    
 [49] a     i     h     Other a     Other Other Other i     h     Other Other
 [61] Other h     Other Other Other Other i     b     Other Other h     g    
 [73] h     Other b     Other h     Other Other Other h     Other Other Other
 [85] Other b     a     Other i     Other Other Other h     Other Other Other
 [97] Other Other Other b    
Levels: a b g h i Other
# Use weighted frequencies
w <- c(rep(2, 50), rep(1, 50))
fct_lump_n(x, n = 5, w = w)
  [1] Other Other c     e     Other d     d     e     e     d     c     c    
 [13] d     e     f     d     e     b     b     d     b     e     e     d    
 [25] d     f     c     f     e     c     b     d     e     d     c     Other
 [37] c     d     d     c     d     c     d     d     f     d     f     Other
 [49] Other Other Other f     Other e     e     f     Other Other f     d    
 [61] c     Other f     e     d     e     Other b     f     c     Other Other
 [73] Other c     b     f     Other c     d     f     Other e     c     f    
 [85] d     b     Other c     Other e     c     d     Other e     d     e    
 [97] e     d     f     b    
Levels: b c d e f Other
# Use ties.method to control how tied factors are collapsed
fct_lump_n(x, n = 6)
  [1] Other Other c     e     h     d     d     e     e     d     c     c    
 [13] d     e     f     d     e     b     b     d     b     e     e     d    
 [25] d     f     c     f     e     c     b     d     e     d     c     Other
 [37] c     d     d     c     d     c     d     d     f     d     f     Other
 [49] Other Other h     f     Other e     e     f     Other h     f     d    
 [61] c     h     f     e     d     e     Other b     f     c     h     Other
 [73] h     c     b     f     h     c     d     f     h     e     c     f    
 [85] d     b     Other c     Other e     c     d     h     e     d     e    
 [97] e     d     f     b    
Levels: b c d e f h Other
fct_lump_n(x, n = 6, ties.method = "max")
  [1] Other Other c     e     h     d     d     e     e     d     c     c    
 [13] d     e     f     d     e     b     b     d     b     e     e     d    
 [25] d     f     c     f     e     c     b     d     e     d     c     Other
 [37] c     d     d     c     d     c     d     d     f     d     f     Other
 [49] Other Other h     f     Other e     e     f     Other h     f     d    
 [61] c     h     f     e     d     e     Other b     f     c     h     Other
 [73] h     c     b     f     h     c     d     f     h     e     c     f    
 [85] d     b     Other c     Other e     c     d     h     e     d     e    
 [97] e     d     f     b    
Levels: b c d e f h Other
# Use fct_lump_min() to lump together all levels with fewer than `n` values
table(fct_lump_min(x, min = 10))
    c     d     e     f Other 
   16    23    18    14    29 
table(fct_lump_min(x, min = 15))
    c     d     e Other 
   16    23    18    43 

[Package forcats version 0.5.1 Index]