Aliases: duplicated duplicated.data.table unique unique.data.table anyDuplicated anyDuplicated.data.table uniqueN
Keywords: data
### ** Examples DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B") duplicated(DT)
[1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
unique(DT)
A B C 1: 1 1 1 2: 1 1 2 3: 1 2 2 4: 2 2 1 5: 2 2 2 6: 2 3 1 7: 2 3 2 8: 3 3 1 9: 3 4 2 10: 3 4 1
duplicated(DT, by="B")
[1] FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
unique(DT, by="B")
A B C 1: 1 1 1 2: 1 2 2 3: 2 3 1 4: 3 4 2
duplicated(DT, by=c("A", "C"))
[1] FALSE FALSE TRUE TRUE FALSE FALSE TRUE TRUE FALSE FALSE TRUE TRUE
unique(DT, by=c("A", "C"))
A B C 1: 1 1 1 2: 1 1 2 3: 2 2 1 4: 2 2 2 5: 3 3 1 6: 3 4 2
DT = data.table(a=c(2L,1L,2L), b=c(1L,2L,1L)) # no key unique(DT) # rows 1 and 2 (row 3 is a duplicate of row 1)
a b 1: 2 1 2: 1 2
DT = data.table(a=c(3.142, 4.2, 4.2, 3.142, 1.223, 1.223), b=rep(1,6)) unique(DT) # rows 1,2 and 5
a b 1: 3.142 1 2: 4.200 1 3: 1.223 1
DT = data.table(a=tan(pi*(1/4 + 1:10)), b=rep(1,10)) # example from ?all.equal length(unique(DT$a)) # 10 strictly unique floating point values
[1] 10
all.equal(DT$a,rep(1,10)) # TRUE, all within tolerance of 1.0
[1] TRUE
DT[,which.min(a)] # row 10, the strictly smallest floating point value
[1] 10
identical(unique(DT),DT[1]) # TRUE, stable within tolerance
[1] FALSE
identical(unique(DT),DT[10]) # FALSE
[1] FALSE
# fromLast=TRUE DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B") duplicated(DT, by="B", fromLast=TRUE)
[1] TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
unique(DT, by="B", fromLast=TRUE)
A B C 1: 1 1 1 2: 2 2 2 3: 3 3 1 4: 3 4 2
# anyDuplicated anyDuplicated(DT, by=c("A", "B")) # 3L
[1] 2
any(duplicated(DT, by=c("A", "B"))) # TRUE
[1] TRUE
# uniqueN, unique rows on key columns uniqueN(DT, by = key(DT))
[1] 6
# uniqueN, unique rows on all columns uniqueN(DT)
[1] 10
# uniqueN while grouped by "A" DT[, .(uN=uniqueN(.SD)), by=A]
A uN 1: 1 3 2: 2 4 3: 3 3
# uniqueN's na.rm=TRUE x = sample(c(NA, NaN, runif(3)), 10, TRUE) uniqueN(x, na.rm = FALSE) # 5, default
[1] 5
uniqueN(x, na.rm=TRUE) # 3
[1] 3