Examples for 'base::cut'


Convert Numeric to Factor

Aliases: cut cut.default

Keywords: category

### ** Examples

Z <- stats::rnorm(10000)
table(cut(Z, breaks = -6:6))
(-6,-5] (-5,-4] (-4,-3] (-3,-2] (-2,-1]  (-1,0]   (0,1]   (1,2]   (2,3]   (3,4] 
      0       0      15     210    1308    3413    3418    1426     199      11 
  (4,5]   (5,6] 
      0       0 
sum(table(cut(Z, breaks = -6:6, labels = FALSE)))
[1] 10000
sum(graphics::hist(Z, breaks = -6:6, plot = FALSE)$counts)
[1] 10000
cut(rep(1,5), 4) #-- dummy
[1] (0.9995,1] (0.9995,1] (0.9995,1] (0.9995,1] (0.9995,1]
Levels: (0.999,0.9995] (0.9995,1] (1,1.0005] (1.0005,1.001]
tx0 <- c(9, 4, 6, 5, 3, 10, 5, 3, 5)
x <- rep(0:8, tx0)
stopifnot(table(x) == tx0)

table( cut(x, breaks = 8))
(-0.008,1]      (1,2]      (2,3]      (3,4]      (4,5]      (5,6]      (6,7] 
        13          6          5          3         10          5          3 
  (7,8.01] 
         5 
table( cut(x, breaks = 3*(-2:5)))
(-6,-3]  (-3,0]   (0,3]   (3,6]   (6,9]  (9,12] (12,15] 
      0       9      15      18       8       0       0 
table( cut(x, breaks = 3*(-2:5), right = FALSE))
[-6,-3)  [-3,0)   [0,3)   [3,6)   [6,9)  [9,12) [12,15) 
      0       0      19      18      13       0       0 
##--- some values OUTSIDE the breaks :
table(cx  <- cut(x, breaks = 2*(0:4)))
(0,2] (2,4] (4,6] (6,8] 
   10     8    15     8 
table(cxl <- cut(x, breaks = 2*(0:4), right = FALSE))
[0,2) [2,4) [4,6) [6,8) 
   13    11    13     8 
which(is.na(cx));  x[is.na(cx)]  #-- the first 9  values  0
[1] 1 2 3 4 5 6 7 8 9
[1] 0 0 0 0 0 0 0 0 0
which(is.na(cxl)); x[is.na(cxl)] #-- the last  5  values  8
[1] 46 47 48 49 50
[1] 8 8 8 8 8
## Label construction:
y <- stats::rnorm(100)
table(cut(y, breaks = pi/3*(-3:3)))
(-3.14,-2.09] (-2.09,-1.05]     (-1.05,0]      (0,1.05]   (1.05,2.09] 
            2            12            36            31            18 
  (2.09,3.14] 
            1 
table(cut(y, breaks = pi/3*(-3:3), dig.lab = 4))
(-3.142,-2.094] (-2.094,-1.047]      (-1.047,0]       (0,1.047]   (1.047,2.094] 
              2              12              36              31              18 
  (2.094,3.142] 
              1 
table(cut(y, breaks =  1*(-3:3), dig.lab = 4))
(-3,-2] (-2,-1]  (-1,0]   (0,1]   (1,2]   (2,3] 
      2      14      34      31      18       1 
# extra digits don't "harm" here
table(cut(y, breaks =  1*(-3:3), right = FALSE))
[-3,-2) [-2,-1)  [-1,0)   [0,1)   [1,2)   [2,3) 
      2      14      34      31      18       1 
#- the same, since no exact INT!

## sometimes the default dig.lab is not enough to be avoid confusion:
aaa <- c(1,2,3,4,5,2,3,4,5,6,7)
cut(aaa, 3)
 [1] (0.994,3] (0.994,3] (0.994,3] (3,5]     (3,5]     (0.994,3] (0.994,3]
 [8] (3,5]     (3,5]     (5,7.01]  (5,7.01] 
Levels: (0.994,3] (3,5] (5,7.01]
cut(aaa, 3, dig.lab = 4, ordered_result = TRUE)
 [1] (0.994,3] (0.994,3] (0.994,3] (3,5]     (3,5]     (0.994,3] (0.994,3]
 [8] (3,5]     (3,5]     (5,7.006] (5,7.006]
Levels: (0.994,3] < (3,5] < (5,7.006]
## one way to extract the breakpoints
labs <- levels(cut(aaa, 3))
cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs) ),
      upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs) ))
     lower upper
[1,] 0.994  3.00
[2,] 3.000  5.00
[3,] 5.000  7.01

[Package base version 4.2.3 Index]