Aliases: cut cut.default
Keywords: category
### ** Examples Z <- stats::rnorm(10000) table(cut(Z, breaks = -6:6))
(-6,-5] (-5,-4] (-4,-3] (-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3] (3,4] 0 0 15 210 1308 3413 3418 1426 199 11 (4,5] (5,6] 0 0
sum(table(cut(Z, breaks = -6:6, labels = FALSE)))
[1] 10000
sum(graphics::hist(Z, breaks = -6:6, plot = FALSE)$counts)
[1] 10000
cut(rep(1,5), 4) #-- dummy
[1] (0.9995,1] (0.9995,1] (0.9995,1] (0.9995,1] (0.9995,1] Levels: (0.999,0.9995] (0.9995,1] (1,1.0005] (1.0005,1.001]
tx0 <- c(9, 4, 6, 5, 3, 10, 5, 3, 5) x <- rep(0:8, tx0) stopifnot(table(x) == tx0) table( cut(x, breaks = 8))
(-0.008,1] (1,2] (2,3] (3,4] (4,5] (5,6] (6,7] 13 6 5 3 10 5 3 (7,8.01] 5
table( cut(x, breaks = 3*(-2:5)))
(-6,-3] (-3,0] (0,3] (3,6] (6,9] (9,12] (12,15] 0 9 15 18 8 0 0
table( cut(x, breaks = 3*(-2:5), right = FALSE))
[-6,-3) [-3,0) [0,3) [3,6) [6,9) [9,12) [12,15) 0 0 19 18 13 0 0
##--- some values OUTSIDE the breaks : table(cx <- cut(x, breaks = 2*(0:4)))
(0,2] (2,4] (4,6] (6,8] 10 8 15 8
table(cxl <- cut(x, breaks = 2*(0:4), right = FALSE))
[0,2) [2,4) [4,6) [6,8) 13 11 13 8
which(is.na(cx)); x[is.na(cx)] #-- the first 9 values 0
[1] 1 2 3 4 5 6 7 8 9
[1] 0 0 0 0 0 0 0 0 0
which(is.na(cxl)); x[is.na(cxl)] #-- the last 5 values 8
[1] 46 47 48 49 50
[1] 8 8 8 8 8
## Label construction: y <- stats::rnorm(100) table(cut(y, breaks = pi/3*(-3:3)))
(-3.14,-2.09] (-2.09,-1.05] (-1.05,0] (0,1.05] (1.05,2.09] 2 12 36 31 18 (2.09,3.14] 1
table(cut(y, breaks = pi/3*(-3:3), dig.lab = 4))
(-3.142,-2.094] (-2.094,-1.047] (-1.047,0] (0,1.047] (1.047,2.094] 2 12 36 31 18 (2.094,3.142] 1
table(cut(y, breaks = 1*(-3:3), dig.lab = 4))
(-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3] 2 14 34 31 18 1
# extra digits don't "harm" here table(cut(y, breaks = 1*(-3:3), right = FALSE))
[-3,-2) [-2,-1) [-1,0) [0,1) [1,2) [2,3) 2 14 34 31 18 1
#- the same, since no exact INT! ## sometimes the default dig.lab is not enough to be avoid confusion: aaa <- c(1,2,3,4,5,2,3,4,5,6,7) cut(aaa, 3)
[1] (0.994,3] (0.994,3] (0.994,3] (3,5] (3,5] (0.994,3] (0.994,3] [8] (3,5] (3,5] (5,7.01] (5,7.01] Levels: (0.994,3] (3,5] (5,7.01]
cut(aaa, 3, dig.lab = 4, ordered_result = TRUE)
[1] (0.994,3] (0.994,3] (0.994,3] (3,5] (3,5] (0.994,3] (0.994,3] [8] (3,5] (3,5] (5,7.006] (5,7.006] Levels: (0.994,3] < (3,5] < (5,7.006]
## one way to extract the breakpoints labs <- levels(cut(aaa, 3)) cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs) ), upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs) ))
lower upper [1,] 0.994 3.00 [2,] 3.000 5.00 [3,] 5.000 7.01