Aliases: fct_lump fct_lump_min fct_lump_prop fct_lump_n fct_lump_lowfreq
Keywords:
### ** Examples x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1))) x %>% table()
. A B C D E F G H I 40 10 5 27 1 1 1 1 1
x %>% fct_lump_n(3) %>% table()
. A B D Other 40 10 27 10
x %>% fct_lump_prop(0.10) %>% table()
. A B D Other 40 10 27 10
x %>% fct_lump_min(5) %>% table()
. A B C D Other 40 10 5 27 5
x %>% fct_lump_lowfreq() %>% table()
. A D Other 40 27 20
x <- factor(letters[rpois(100, 5)]) x
[1] g i c e h d d e e d c c d e f d e b b d b e e d d f c f e c b d e d c g c [38] d d c d c d d f d f g a i h f a e e f i h f d c h f e d e i b f c h g h c [75] b f h c d f h e c f d b a c i e c d h e d e e d f b Levels: a b c d e f g h i
table(x)
x a b c d e f g h i 3 8 16 23 18 14 4 9 5
table(fct_lump_lowfreq(x))
b c d e f g h i Other 8 16 23 18 14 4 9 5 3
# Use positive values to collapse the rarest fct_lump_n(x, n = 3)
[1] Other Other c e Other d d e e d c c [13] d e Other d e Other Other d Other e e d [25] d Other c Other e c Other d e d c Other [37] c d d c d c d d Other d Other Other [49] Other Other Other Other Other e e Other Other Other Other d [61] c Other Other e d e Other Other Other c Other Other [73] Other c Other Other Other c d Other Other e c Other [85] d Other Other c Other e c d Other e d e [97] e d Other Other Levels: c d e Other
fct_lump_prop(x, prop = 0.1)
[1] Other Other c e Other d d e e d c c [13] d e f d e Other Other d Other e e d [25] d f c f e c Other d e d c Other [37] c d d c d c d d f d f Other [49] Other Other Other f Other e e f Other Other f d [61] c Other f e d e Other Other f c Other Other [73] Other c Other f Other c d f Other e c f [85] d Other Other c Other e c d Other e d e [97] e d f Other Levels: c d e f Other
# Use negative values to collapse the most common fct_lump_n(x, n = -3)
[1] g i Other Other Other Other Other Other Other Other Other Other [13] Other Other Other Other Other Other Other Other Other Other Other Other [25] Other Other Other Other Other Other Other Other Other Other Other g [37] Other Other Other Other Other Other Other Other Other Other Other g [49] a i Other Other a Other Other Other i Other Other Other [61] Other Other Other Other Other Other i Other Other Other Other g [73] Other Other Other Other Other Other Other Other Other Other Other Other [85] Other Other a Other i Other Other Other Other Other Other Other [97] Other Other Other Other Levels: a g i Other
fct_lump_prop(x, prop = -0.1)
[1] g i Other Other h Other Other Other Other Other Other Other [13] Other Other Other Other Other b b Other b Other Other Other [25] Other Other Other Other Other Other b Other Other Other Other g [37] Other Other Other Other Other Other Other Other Other Other Other g [49] a i h Other a Other Other Other i h Other Other [61] Other h Other Other Other Other i b Other Other h g [73] h Other b Other h Other Other Other h Other Other Other [85] Other b a Other i Other Other Other h Other Other Other [97] Other Other Other b Levels: a b g h i Other
# Use weighted frequencies w <- c(rep(2, 50), rep(1, 50)) fct_lump_n(x, n = 5, w = w)
[1] Other Other c e Other d d e e d c c [13] d e f d e b b d b e e d [25] d f c f e c b d e d c Other [37] c d d c d c d d f d f Other [49] Other Other Other f Other e e f Other Other f d [61] c Other f e d e Other b f c Other Other [73] Other c b f Other c d f Other e c f [85] d b Other c Other e c d Other e d e [97] e d f b Levels: b c d e f Other
# Use ties.method to control how tied factors are collapsed fct_lump_n(x, n = 6)
[1] Other Other c e h d d e e d c c [13] d e f d e b b d b e e d [25] d f c f e c b d e d c Other [37] c d d c d c d d f d f Other [49] Other Other h f Other e e f Other h f d [61] c h f e d e Other b f c h Other [73] h c b f h c d f h e c f [85] d b Other c Other e c d h e d e [97] e d f b Levels: b c d e f h Other
fct_lump_n(x, n = 6, ties.method = "max")
[1] Other Other c e h d d e e d c c [13] d e f d e b b d b e e d [25] d f c f e c b d e d c Other [37] c d d c d c d d f d f Other [49] Other Other h f Other e e f Other h f d [61] c h f e d e Other b f c h Other [73] h c b f h c d f h e c f [85] d b Other c Other e c d h e d e [97] e d f b Levels: b c d e f h Other
# Use fct_lump_min() to lump together all levels with fewer than `n` values table(fct_lump_min(x, min = 10))
c d e f Other 16 23 18 14 29
table(fct_lump_min(x, min = 15))
c d e Other 16 23 18 43