Examples for 'data.table::duplicated'


Determine Duplicate Rows

Aliases: duplicated duplicated.data.table unique unique.data.table anyDuplicated anyDuplicated.data.table uniqueN

Keywords: data

### ** Examples

DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3),
                  C = rep(1:2, 6), key = "A,B")
duplicated(DT)
 [1] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
unique(DT)
    A B C
 1: 1 1 1
 2: 1 1 2
 3: 1 2 2
 4: 2 2 1
 5: 2 2 2
 6: 2 3 1
 7: 2 3 2
 8: 3 3 1
 9: 3 4 2
10: 3 4 1
duplicated(DT, by="B")
 [1] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
unique(DT, by="B")
   A B C
1: 1 1 1
2: 1 2 2
3: 2 3 1
4: 3 4 2
duplicated(DT, by=c("A", "C"))
 [1] FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
unique(DT, by=c("A", "C"))
   A B C
1: 1 1 1
2: 1 1 2
3: 2 2 1
4: 2 2 2
5: 3 3 1
6: 3 4 2
DT = data.table(a=c(2L,1L,2L), b=c(1L,2L,1L))   # no key
unique(DT)                   # rows 1 and 2 (row 3 is a duplicate of row 1)
   a b
1: 2 1
2: 1 2
DT = data.table(a=c(3.142, 4.2, 4.2, 3.142, 1.223, 1.223), b=rep(1,6))
unique(DT)                   # rows 1,2 and 5
       a b
1: 3.142 1
2: 4.200 1
3: 1.223 1
DT = data.table(a=tan(pi*(1/4 + 1:10)), b=rep(1,10))   # example from ?all.equal
length(unique(DT$a))         # 10 strictly unique floating point values
[1] 10
all.equal(DT$a,rep(1,10))    # TRUE, all within tolerance of 1.0
[1] TRUE
DT[,which.min(a)]            # row 10, the strictly smallest floating point value
[1] 10
identical(unique(DT),DT[1])  # TRUE, stable within tolerance
[1] FALSE
identical(unique(DT),DT[10]) # FALSE
[1] FALSE
# fromLast=TRUE
DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3),
                 C = rep(1:2, 6), key = "A,B")
duplicated(DT, by="B", fromLast=TRUE)
 [1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
unique(DT, by="B", fromLast=TRUE)
   A B C
1: 1 1 1
2: 2 2 2
3: 3 3 1
4: 3 4 2
# anyDuplicated
anyDuplicated(DT, by=c("A", "B"))    # 3L
[1] 2
any(duplicated(DT, by=c("A", "B")))  # TRUE
[1] TRUE
# uniqueN, unique rows on key columns
uniqueN(DT, by = key(DT))
[1] 6
# uniqueN, unique rows on all columns
uniqueN(DT)
[1] 10
# uniqueN while grouped by "A"
DT[, .(uN=uniqueN(.SD)), by=A]
   A uN
1: 1  3
2: 2  4
3: 3  3
# uniqueN's na.rm=TRUE
x = sample(c(NA, NaN, runif(3)), 10, TRUE)
uniqueN(x, na.rm = FALSE) # 5, default
[1] 5
uniqueN(x, na.rm=TRUE) # 3
[1] 3

[Package data.table version 1.14.2 Index]