Aliases: dcast.data.table dcast
Keywords: data
### ** Examples ChickWeight = as.data.table(ChickWeight) setnames(ChickWeight, tolower(names(ChickWeight))) DT <- melt(as.data.table(ChickWeight), id=2:4) # calls melt.data.table # dcast is an S3 method in data.table from v1.9.6 dcast(DT, time ~ variable, fun=mean) # using partial matching of argument
time weight 1: 0 41.06000 2: 2 49.22000 3: 4 59.95918 4: 6 74.30612 5: 8 91.24490 6: 10 107.83673 7: 12 129.24490 8: 14 143.81250 9: 16 168.08511 10: 18 190.19149 11: 20 209.71739 12: 21 218.68889
dcast(DT, diet ~ variable, fun=mean)
diet weight 1: 1 102.6455 2: 2 122.6167 3: 3 142.9500 4: 4 135.2627
dcast(DT, diet+chick ~ time, drop=FALSE)
diet chick 0 2 4 6 8 10 12 14 16 18 20 21 1: 1 18 39 35 NA NA NA NA NA NA NA NA NA NA 2: 1 16 41 45 49 51 57 51 54 NA NA NA NA NA 3: 1 15 41 49 56 64 68 68 67 68 NA NA NA NA 4: 1 13 41 48 53 60 65 67 71 70 71 81 91 96 5: 1 9 42 51 59 68 85 96 90 92 93 100 100 98 --- 196: 4 49 40 53 64 85 108 128 152 166 184 203 233 237 197: 4 46 40 52 62 82 101 120 144 156 173 210 231 238 198: 4 50 41 54 67 84 105 122 155 175 205 234 264 264 199: 4 42 42 49 63 84 103 126 160 174 204 234 269 281 200: 4 48 39 50 62 80 104 125 154 170 222 261 303 322
dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)
diet chick 0 2 4 6 8 10 12 14 16 18 20 21 1: 1 18 39 35 0 0 0 0 0 0 0 0 0 0 2: 1 16 41 45 49 51 57 51 54 0 0 0 0 0 3: 1 15 41 49 56 64 68 68 67 68 0 0 0 0 4: 1 13 41 48 53 60 65 67 71 70 71 81 91 96 5: 1 9 42 51 59 68 85 96 90 92 93 100 100 98 --- 196: 4 49 40 53 64 85 108 128 152 166 184 203 233 237 197: 4 46 40 52 62 82 101 120 144 156 173 210 231 238 198: 4 50 41 54 67 84 105 122 155 175 205 234 264 264 199: 4 42 42 49 63 84 103 126 160 174 204 234 269 281 200: 4 48 39 50 62 80 104 125 154 170 222 261 303 322
# using subset dcast(DT, chick ~ time, fun=mean, subset=.(time < 10 & chick < 20))
chick 0 2 4 6 8 1: 18 39 35 NaN NaN NaN 2: 16 41 45 49 51 57 3: 15 41 49 56 64 68 4: 13 41 48 53 60 65 5: 9 42 51 59 68 85
# drop argument, #1512 DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2), v2 = factor(c(1L, 1L, 1L, 3L, 3L, 3L), levels=1:3), v3 = factor(c(2L, 3L, 5L, 1L, 2L, 6L), levels=1:6), v4 = c(3L, 2L, 2L, 5L, 4L, 3L)) # drop=TRUE dcast(DT, v1 + v2 ~ v3) # default is drop=TRUE
v1 v2 1 2 3 5 6 1: 1.1 1 NA 3 2 2 NA 2: 2.2 3 5 4 NA NA 3
dcast(DT, v1 + v2 ~ v3, drop=FALSE) # all missing combinations of both LHS and RHS
v1 v2 1 2 3 4 5 6 1: 1.1 1 NA 3 2 NA 2 NA 2: 1.1 2 NA NA NA NA NA NA 3: 1.1 3 NA NA NA NA NA NA 4: 2.2 1 NA NA NA NA NA NA 5: 2.2 2 NA NA NA NA NA NA 6: 2.2 3 5 4 NA NA NA 3
dcast(DT, v1 + v2 ~ v3, drop=c(FALSE, TRUE)) # all missing combinations of only LHS
v1 v2 1 2 3 5 6 1: 1.1 1 NA 3 2 2 NA 2: 1.1 2 NA NA NA NA NA 3: 1.1 3 NA NA NA NA NA 4: 2.2 1 NA NA NA NA NA 5: 2.2 2 NA NA NA NA NA 6: 2.2 3 5 4 NA NA 3
dcast(DT, v1 + v2 ~ v3, drop=c(TRUE, FALSE)) # all missing combinations of only RHS
v1 v2 1 2 3 4 5 6 1: 1.1 1 NA 3 2 NA 2 NA 2: 2.2 3 5 4 NA NA NA 3
# using . and ... DT <- data.table(v1 = rep(1:2, each = 6), v2 = rep(rep(1:3, 2), each = 2), v3 = rep(1:2, 6), v4 = rnorm(6)) dcast(DT, ... ~ v3, value.var = "v4") #same as v1 + v2 ~ v3, value.var = "v4"
v1 v2 1 2 1: 1 1 0.05155202 -0.2758148 2: 1 2 -0.11155089 -0.7805347 3: 1 3 -1.01369590 1.1762403 4: 2 1 0.05155202 -0.2758148 5: 2 2 -0.11155089 -0.7805347 6: 2 3 -1.01369590 1.1762403
dcast(DT, v1 + v2 + v3 ~ ., value.var = "v4")
v1 v2 v3 . 1: 1 1 1 0.05155202 2: 1 1 2 -0.27581481 3: 1 2 1 -0.11155089 4: 1 2 2 -0.78053469 5: 1 3 1 -1.01369590 6: 1 3 2 1.17624028 7: 2 1 1 0.05155202 8: 2 1 2 -0.27581481 9: 2 2 1 -0.11155089 10: 2 2 2 -0.78053469 11: 2 3 1 -1.01369590 12: 2 3 2 1.17624028
## for each combination of (v1, v2), add up all values of v4 dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
v1 v2 . 1: 1 1 -0.2242628 2: 1 2 -0.8920856 3: 1 3 0.1625444 4: 2 1 -0.2242628 5: 2 2 -0.8920856 6: 2 3 0.1625444
# fill and types dcast(DT, v2 ~ v3, value.var = 'v1', fill = 0L) # 0L --> 0
v2 1 2 1: 1 2 2 2: 2 2 2 3: 3 2 2
dcast(DT, v2 ~ v3, value.var = 'v4', fill = 1.1) # 1.1 --> 1L
v2 1 2 1: 1 2 2 2: 2 2 2 3: 3 2 2
# multiple value.var and multiple fun.aggregate DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE), z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L) # multiple value.var dcast(DT, x + y ~ z, fun=sum, value.var=c("d1","d2"))
x y d1_a d1_b d2_a d2_b 1: 1 1 0.5629049 0.7229967 1 1 2: 1 2 0.3269525 1.6225416 1 2 3: 2 1 0.1451689 0.3708269 1 1 4: 2 2 0.0000000 0.5857566 0 1 5: 3 1 0.3415015 0.0000000 1 0 6: 3 2 0.0000000 0.1599543 0 1 7: 4 1 0.2328277 1.2163449 1 3 8: 4 2 0.0000000 0.7405870 0 1 9: 5 1 2.8513121 0.0000000 4 0 10: 5 2 0.6390181 0.0000000 1 0
# multiple fun.aggregate dcast(DT, x + y ~ z, fun=list(sum, mean), value.var="d1")
x y d1_sum_a d1_sum_b d1_mean_a d1_mean_b 1: 1 1 0.5629049 0.7229967 0.5629049 0.7229967 2: 1 2 0.3269525 1.6225416 0.3269525 0.8112708 3: 2 1 0.1451689 0.3708269 0.1451689 0.3708269 4: 2 2 0.0000000 0.5857566 NaN 0.5857566 5: 3 1 0.3415015 0.0000000 0.3415015 NaN 6: 3 2 0.0000000 0.1599543 NaN 0.1599543 7: 4 1 0.2328277 1.2163449 0.2328277 0.4054483 8: 4 2 0.0000000 0.7405870 NaN 0.7405870 9: 5 1 2.8513121 0.0000000 0.7128280 NaN 10: 5 2 0.6390181 0.0000000 0.6390181 NaN
# multiple fun.agg and value.var (all combinations) dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=c("d1", "d2"))
x y d1_sum_a d1_sum_b d2_sum_a d2_sum_b d1_mean_a d1_mean_b d2_mean_a 1: 1 1 0.5629049 0.7229967 1 1 0.5629049 0.7229967 1 2: 1 2 0.3269525 1.6225416 1 2 0.3269525 0.8112708 1 3: 2 1 0.1451689 0.3708269 1 1 0.1451689 0.3708269 1 4: 2 2 0.0000000 0.5857566 0 1 NaN 0.5857566 NaN 5: 3 1 0.3415015 0.0000000 1 0 0.3415015 NaN 1 6: 3 2 0.0000000 0.1599543 0 1 NaN 0.1599543 NaN 7: 4 1 0.2328277 1.2163449 1 3 0.2328277 0.4054483 1 8: 4 2 0.0000000 0.7405870 0 1 NaN 0.7405870 NaN 9: 5 1 2.8513121 0.0000000 4 0 0.7128280 NaN 1 10: 5 2 0.6390181 0.0000000 1 0 0.6390181 NaN 1 d2_mean_b 1: 1 2: 1 3: 1 4: 1 5: NaN 6: 1 7: 1 8: 1 9: NaN 10: NaN
# multiple fun.agg and value.var (one-to-one) dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=list("d1", "d2"))
x y d1_sum_a d1_sum_b d2_mean_a d2_mean_b 1: 1 1 0.5629049 0.7229967 1 1 2: 1 2 0.3269525 1.6225416 1 1 3: 2 1 0.1451689 0.3708269 1 1 4: 2 2 0.0000000 0.5857566 NaN 1 5: 3 1 0.3415015 0.0000000 1 NaN 6: 3 2 0.0000000 0.1599543 NaN 1 7: 4 1 0.2328277 1.2163449 1 1 8: 4 2 0.0000000 0.7405870 NaN 1 9: 5 1 2.8513121 0.0000000 1 NaN 10: 5 2 0.6390181 0.0000000 1 NaN