Examples for 'data.table::dcast.data.table'


Fast dcast for data.table

Aliases: dcast.data.table dcast

Keywords: data

### ** Examples

ChickWeight = as.data.table(ChickWeight)
setnames(ChickWeight, tolower(names(ChickWeight)))
DT <- melt(as.data.table(ChickWeight), id=2:4) # calls melt.data.table

# dcast is an S3 method in data.table from v1.9.6
dcast(DT, time ~ variable, fun=mean) # using partial matching of argument
    time    weight
 1:    0  41.06000
 2:    2  49.22000
 3:    4  59.95918
 4:    6  74.30612
 5:    8  91.24490
 6:   10 107.83673
 7:   12 129.24490
 8:   14 143.81250
 9:   16 168.08511
10:   18 190.19149
11:   20 209.71739
12:   21 218.68889
dcast(DT, diet ~ variable, fun=mean)
   diet   weight
1:    1 102.6455
2:    2 122.6167
3:    3 142.9500
4:    4 135.2627
dcast(DT, diet+chick ~ time, drop=FALSE)
     diet chick  0  2  4  6   8  10  12  14  16  18  20  21
  1:    1    18 39 35 NA NA  NA  NA  NA  NA  NA  NA  NA  NA
  2:    1    16 41 45 49 51  57  51  54  NA  NA  NA  NA  NA
  3:    1    15 41 49 56 64  68  68  67  68  NA  NA  NA  NA
  4:    1    13 41 48 53 60  65  67  71  70  71  81  91  96
  5:    1     9 42 51 59 68  85  96  90  92  93 100 100  98
 ---                                                       
196:    4    49 40 53 64 85 108 128 152 166 184 203 233 237
197:    4    46 40 52 62 82 101 120 144 156 173 210 231 238
198:    4    50 41 54 67 84 105 122 155 175 205 234 264 264
199:    4    42 42 49 63 84 103 126 160 174 204 234 269 281
200:    4    48 39 50 62 80 104 125 154 170 222 261 303 322
dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)
     diet chick  0  2  4  6   8  10  12  14  16  18  20  21
  1:    1    18 39 35  0  0   0   0   0   0   0   0   0   0
  2:    1    16 41 45 49 51  57  51  54   0   0   0   0   0
  3:    1    15 41 49 56 64  68  68  67  68   0   0   0   0
  4:    1    13 41 48 53 60  65  67  71  70  71  81  91  96
  5:    1     9 42 51 59 68  85  96  90  92  93 100 100  98
 ---                                                       
196:    4    49 40 53 64 85 108 128 152 166 184 203 233 237
197:    4    46 40 52 62 82 101 120 144 156 173 210 231 238
198:    4    50 41 54 67 84 105 122 155 175 205 234 264 264
199:    4    42 42 49 63 84 103 126 160 174 204 234 269 281
200:    4    48 39 50 62 80 104 125 154 170 222 261 303 322
# using subset
dcast(DT, chick ~ time, fun=mean, subset=.(time < 10 & chick < 20))
   chick  0  2   4   6   8
1:    18 39 35 NaN NaN NaN
2:    16 41 45  49  51  57
3:    15 41 49  56  64  68
4:    13 41 48  53  60  65
5:     9 42 51  59  68  85
# drop argument, #1512
DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2),
                 v2 = factor(c(1L, 1L, 1L, 3L, 3L, 3L), levels=1:3),
                 v3 = factor(c(2L, 3L, 5L, 1L, 2L, 6L), levels=1:6),
                 v4 = c(3L, 2L, 2L, 5L, 4L, 3L))
# drop=TRUE
dcast(DT, v1 + v2 ~ v3)                      # default is drop=TRUE
Using 'v4' as value column. Use 'value.var' to override
    v1 v2  1 2  3  5  6
1: 1.1  1 NA 3  2  2 NA
2: 2.2  3  5 4 NA NA  3
dcast(DT, v1 + v2 ~ v3, drop=FALSE)          # all missing combinations of both LHS and RHS
Using 'v4' as value column. Use 'value.var' to override
    v1 v2  1  2  3  4  5  6
1: 1.1  1 NA  3  2 NA  2 NA
2: 1.1  2 NA NA NA NA NA NA
3: 1.1  3 NA NA NA NA NA NA
4: 2.2  1 NA NA NA NA NA NA
5: 2.2  2 NA NA NA NA NA NA
6: 2.2  3  5  4 NA NA NA  3
dcast(DT, v1 + v2 ~ v3, drop=c(FALSE, TRUE)) # all missing combinations of only LHS
Using 'v4' as value column. Use 'value.var' to override
    v1 v2  1  2  3  5  6
1: 1.1  1 NA  3  2  2 NA
2: 1.1  2 NA NA NA NA NA
3: 1.1  3 NA NA NA NA NA
4: 2.2  1 NA NA NA NA NA
5: 2.2  2 NA NA NA NA NA
6: 2.2  3  5  4 NA NA  3
dcast(DT, v1 + v2 ~ v3, drop=c(TRUE, FALSE)) # all missing combinations of only RHS
Using 'v4' as value column. Use 'value.var' to override
    v1 v2  1 2  3  4  5  6
1: 1.1  1 NA 3  2 NA  2 NA
2: 2.2  3  5 4 NA NA NA  3
# using . and ...
DT <- data.table(v1 = rep(1:2, each = 6),
                 v2 = rep(rep(1:3, 2), each = 2),
                 v3 = rep(1:2, 6),
                 v4 = rnorm(6))
dcast(DT, ... ~ v3, value.var = "v4") #same as v1 + v2 ~ v3, value.var = "v4"
   v1 v2           1          2
1:  1  1  0.05155202 -0.2758148
2:  1  2 -0.11155089 -0.7805347
3:  1  3 -1.01369590  1.1762403
4:  2  1  0.05155202 -0.2758148
5:  2  2 -0.11155089 -0.7805347
6:  2  3 -1.01369590  1.1762403
dcast(DT, v1 + v2 + v3 ~ ., value.var = "v4")
    v1 v2 v3           .
 1:  1  1  1  0.05155202
 2:  1  1  2 -0.27581481
 3:  1  2  1 -0.11155089
 4:  1  2  2 -0.78053469
 5:  1  3  1 -1.01369590
 6:  1  3  2  1.17624028
 7:  2  1  1  0.05155202
 8:  2  1  2 -0.27581481
 9:  2  2  1 -0.11155089
10:  2  2  2 -0.78053469
11:  2  3  1 -1.01369590
12:  2  3  2  1.17624028
## for each combination of (v1, v2), add up all values of v4
dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
   v1 v2          .
1:  1  1 -0.2242628
2:  1  2 -0.8920856
3:  1  3  0.1625444
4:  2  1 -0.2242628
5:  2  2 -0.8920856
6:  2  3  0.1625444
# fill and types
dcast(DT, v2 ~ v3, value.var = 'v1', fill = 0L)  #  0L --> 0
Aggregate function missing, defaulting to 'length'
   v2 1 2
1:  1 2 2
2:  2 2 2
3:  3 2 2
dcast(DT, v2 ~ v3, value.var = 'v4', fill = 1.1) # 1.1 --> 1L
Aggregate function missing, defaulting to 'length'
   v2 1 2
1:  1 2 2
2:  2 2 2
3:  3 2 2
# multiple value.var and multiple fun.aggregate
DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE),
                z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L)
# multiple value.var
dcast(DT, x + y ~ z, fun=sum, value.var=c("d1","d2"))
    x y      d1_a      d1_b d2_a d2_b
 1: 1 1 0.5629049 0.7229967    1    1
 2: 1 2 0.3269525 1.6225416    1    2
 3: 2 1 0.1451689 0.3708269    1    1
 4: 2 2 0.0000000 0.5857566    0    1
 5: 3 1 0.3415015 0.0000000    1    0
 6: 3 2 0.0000000 0.1599543    0    1
 7: 4 1 0.2328277 1.2163449    1    3
 8: 4 2 0.0000000 0.7405870    0    1
 9: 5 1 2.8513121 0.0000000    4    0
10: 5 2 0.6390181 0.0000000    1    0
# multiple fun.aggregate
dcast(DT, x + y ~ z, fun=list(sum, mean), value.var="d1")
    x y  d1_sum_a  d1_sum_b d1_mean_a d1_mean_b
 1: 1 1 0.5629049 0.7229967 0.5629049 0.7229967
 2: 1 2 0.3269525 1.6225416 0.3269525 0.8112708
 3: 2 1 0.1451689 0.3708269 0.1451689 0.3708269
 4: 2 2 0.0000000 0.5857566       NaN 0.5857566
 5: 3 1 0.3415015 0.0000000 0.3415015       NaN
 6: 3 2 0.0000000 0.1599543       NaN 0.1599543
 7: 4 1 0.2328277 1.2163449 0.2328277 0.4054483
 8: 4 2 0.0000000 0.7405870       NaN 0.7405870
 9: 5 1 2.8513121 0.0000000 0.7128280       NaN
10: 5 2 0.6390181 0.0000000 0.6390181       NaN
# multiple fun.agg and value.var (all combinations)
dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=c("d1", "d2"))
    x y  d1_sum_a  d1_sum_b d2_sum_a d2_sum_b d1_mean_a d1_mean_b d2_mean_a
 1: 1 1 0.5629049 0.7229967        1        1 0.5629049 0.7229967         1
 2: 1 2 0.3269525 1.6225416        1        2 0.3269525 0.8112708         1
 3: 2 1 0.1451689 0.3708269        1        1 0.1451689 0.3708269         1
 4: 2 2 0.0000000 0.5857566        0        1       NaN 0.5857566       NaN
 5: 3 1 0.3415015 0.0000000        1        0 0.3415015       NaN         1
 6: 3 2 0.0000000 0.1599543        0        1       NaN 0.1599543       NaN
 7: 4 1 0.2328277 1.2163449        1        3 0.2328277 0.4054483         1
 8: 4 2 0.0000000 0.7405870        0        1       NaN 0.7405870       NaN
 9: 5 1 2.8513121 0.0000000        4        0 0.7128280       NaN         1
10: 5 2 0.6390181 0.0000000        1        0 0.6390181       NaN         1
    d2_mean_b
 1:         1
 2:         1
 3:         1
 4:         1
 5:       NaN
 6:         1
 7:         1
 8:         1
 9:       NaN
10:       NaN
# multiple fun.agg and value.var (one-to-one)
dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=list("d1", "d2"))
    x y  d1_sum_a  d1_sum_b d2_mean_a d2_mean_b
 1: 1 1 0.5629049 0.7229967         1         1
 2: 1 2 0.3269525 1.6225416         1         1
 3: 2 1 0.1451689 0.3708269         1         1
 4: 2 2 0.0000000 0.5857566       NaN         1
 5: 3 1 0.3415015 0.0000000         1       NaN
 6: 3 2 0.0000000 0.1599543       NaN         1
 7: 4 1 0.2328277 1.2163449         1         1
 8: 4 2 0.0000000 0.7405870       NaN         1
 9: 5 1 2.8513121 0.0000000         1       NaN
10: 5 2 0.6390181 0.0000000         1       NaN

[Package data.table version 1.14.2 Index]