Examples for 'proxy::pr_DB'


Registry of proximities

Aliases: pr_DB registry summary.pr_DB

Keywords: cluster

### ** Examples

## create a new distance measure
mydist <- function(x,y) x * y

## create a new entry in the registry with two aliases
pr_DB$set_entry(FUN = mydist, names = c("test", "mydist"))

## look it up (index is case insensitive):
pr_DB$get_entry("TEST")
      names test, mydist
        FUN function (x, y) x * y
   distance TRUE
     PREFUN NA
    POSTFUN NA
    convert NA
       type other
       loop TRUE
      C_FUN FALSE
    PACKAGE proxy
       abcd FALSE
    formula NA
  reference NA
description NA
## modify the content of the description field in the new entry
pr_DB$modify_entry(names = "test", description = "foo function")

## create a new field
pr_DB$set_field("New")

## look up the test entry again (two ways)
pr_DB$get_entry("test")
      names test, mydist
        FUN function (x, y) x * y
   distance TRUE
     PREFUN NA
    POSTFUN NA
    convert NA
       type other
       loop TRUE
      C_FUN FALSE
    PACKAGE proxy
       abcd FALSE
    formula NA
  reference NA
description foo function
        New NA
pr_DB[["test"]]
      names test, mydist
        FUN function (x, y) x * y
   distance TRUE
     PREFUN NA
    POSTFUN NA
    convert NA
       type other
       loop TRUE
      C_FUN FALSE
    PACKAGE proxy
       abcd FALSE
    formula NA
  reference NA
description foo function
        New NA
## show total number of entries
length(pr_DB)
[1] 51
## show all entries (short list)
pr_DB$get_entries(pattern = "foo")
$test
      names test, mydist
        FUN function (x, y) x * y
   distance TRUE
     PREFUN NA
    POSTFUN NA
    convert NA
       type other
       loop TRUE
      C_FUN FALSE
    PACKAGE proxy
       abcd FALSE
    formula NA
  reference NA
description foo function
        New NA
## show more details
summary(pr_DB, "long")
* Similarity measures:
     Jaccard/binary/Reyssac/Roux (binary) = a / (a + b + c)
     Kulczynski1 (binary) = a / (b + c)
     Kulczynski2 (binary) = [a / (a + b) + a / (a + c)] / 2
     Mountford (binary) = 2a / (ab + ac + 2bc)
     Fager/McGowan (binary) = a / sqrt((a + b)(a + c)) - sqrt(a + c) / 2
     Russel/Rao (binary) = a / n
     simple matching/Sokal/Michener (binary) = (a + d) / n
     Hamman (binary) = ([a + d] - [b + c]) / n
     Faith (binary) = (a + d/2) / n
     Tanimoto/Rogers (binary) = (a + d) / (a + 2b + 2c + d)
     Dice/Czekanowski/Sorensen (binary) = 2a / (2a + b + c)
     Phi (binary) = (ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)]
     Stiles (binary) = log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)])
     Michael (binary) = 4(ad - bc) / [(a + d)^2 + (b + c)^2]
     Mozley/Margalef (binary) = an / (a + b)(a + c)
     Yule (binary) = (ad - bc) / (ad + bc)
     Yule2 (binary) = (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc))
     Ochiai (binary) = a / sqrt[(a + b)(a + c)]
     Simpson (binary) = a / min{(a + b), (a + c)}
     Braun-Blanquet (binary) = a / max{(a + b), (a + c)}
     cosine (metric) = xy / sqrt(xx * yy)
     angular (metric) = 1 - acos(xy / sqrt(xx * yy)) / pi
     eJaccard/extended_Jaccard (metric) = xy / (xx + yy - xy)
     eDice/extended_Dice/eSorensen (metric) = 2xy / (xx + yy)
     correlation (metric) = xy / sqrt(xx * yy) for centered x,y
     Chi-squared (nominal) = sum_ij (o_i - e_i)^2 / e_i
     Phi-squared (nominal) = [sum_ij (o_i - e_i)^2 / e_i] / n
     Tschuprow (nominal) = sqrt{[sum_ij (o_i - e_i)^2 / e_i] / n / sqrt((p - 1)(q - 1))}
     Cramer (nominal) = sqrt{[Chi / n)] / min[(p - 1), (q - 1)]}
     Pearson/contingency (nominal) = sqrt{Chi / (n + Chi)}
     Gower (other) = Sum_k (s_ijk * w_k) / Sum_k (d_ijk * w_k)

* Distance measures:
     Euclidean/L2 (metric) = sqrt(sum_i (x_i - y_i)^2))
     Mahalanobis (metric) = sqrt((x - y) Sigma^(-1) (x - y))
     Bhjattacharyya (metric) = sqrt(sum_i (sqrt(x_i) - sqrt(y_i))^2))
     Manhattan/City-Block/L1/taxi (metric) = sum_i |x_i - y_i|
     supremum/max/maximum/Tschebyscheff/Chebyshev (metric) = max_i |x_i - y_i|
     Minkowski/Lp (metric) = (sum_i (x_i - y_i)^p)^(1/p)
     Canberra (metric) = sum_i |x_i - y_i| / |x_i + y_i|
     Wave/Hedges (metric) = sum_i (1 - min(x_i, y_i) / max(x_i, y_i))
     divergence (metric) = sum_i (x_i - y_i)^2 / (x_i + y_i)^2
     Kullback/Leibler (metric) = sum_i [x_i * log((x_i / sum_j x_j) / (y_i / sum_j y_j)) / sum_j x_j)]
     Bray/Curtis (metric) = sum_i |x_i - y_i| / sum_i (x_i + y_i)
     Soergel (metric) = sum_i |x_i - y_i| / sum_i max{x_i, y_i}
     Levenshtein (other) = Number of insertions, edits, and deletions between to strings
     Podani/discordance (metric) = 1 - 2 * (a - b + c - d) / (n * (n - 1))
     Chord (metric) = sqrt(2 * (1 - xy / sqrt(xx * yy)))
     Geodesic (metric) = arccos(xy / sqrt(xx * yy))
     Whittaker (metric) = sum_i |x_i / sum_i x - y_i / sum_i y| / 2
     Hellinger (metric) = sqrt(sum_i (sqrt(x_i / sum_i x) - sqrt(y_i / sum_i y)) ^ 2)
     fJaccard/fuzzy_Jaccard (metric) = sum_i (min{x_i, y_i} / max{x_i, y_i})
     test/mydist (other) = NA
## get all entries in a list (and extract first two ones)
pr_DB$get_entries()[1:2]
$Jaccard
      names Jaccard, binary, Reyssac, Roux
        FUN R_bjaccard
   distance FALSE
     PREFUN pr_Jaccard_prefun
    POSTFUN NA
    convert pr_simil2dist
       type binary
       loop FALSE
      C_FUN TRUE
    PACKAGE proxy
       abcd FALSE
    formula a / (a + b + c)
  reference Jaccard, P. (1908). Nouvelles recherches sur la
            distribution florale. Bull. Soc. Vaud. Sci. Nat., 44, pp.
            223--270.
description The Jaccard Similarity (C implementation) for binary data.
            It is the proportion of (TRUE, TRUE) pairs, but not
            considering (FALSE, FALSE) pairs. So it compares the
            intersection with the union of object sets.
        New NA

$Kulczynski1
      names Kulczynski1
        FUN pr_Kulczynski1
   distance FALSE
     PREFUN NA
    POSTFUN NA
    convert pr_simil2dist
       type binary
       loop TRUE
      C_FUN FALSE
    PACKAGE proxy
       abcd TRUE
    formula a / (b + c)
  reference Kurzcynski, T.W. (1970). Generalized distance and discrete
            variables. Biometrics, 26, pp. 525--534.
description Kulczynski Similarity for binary data. Relates the (TRUE,
            TRUE) pairs to discordant pairs.
        New NA
## get all entries as a data frame (select first 3 fields)
as.data.frame(pr_DB)[,1:3]
                                                                                              FUN
Jaccard                                                                                R_bjaccard
Kulczynski1                                                                        pr_Kulczynski1
Kulczynski2                                                                        pr_Kulczynski2
Mountford                                                                            pr_Mountford
Fager                                                                             pr_fagerMcgowan
Russel                                                                               pr_RusselRao
simple matching                                                                 pr_SimpleMatching
Hamman                                                                                  pr_Hamman
Faith                                                                                    pr_Faith
Tanimoto                                                                        pr_RogersTanimoto
Dice                                                                                      pr_Dice
Phi                                                                                        pr_Phi
Stiles                                                                                  pr_Stiles
Michael                                                                                pr_Michael
Mozley                                                                          pr_MozleyMargalef
Yule                                                                                      pr_Yule
Yule2                                                                                    pr_Yule2
Ochiai                                                                                  pr_Ochiai
Simpson                                                                                pr_Simpson
Braun-Blanquet                                                                   pr_BraunBlanquet
cosine                                                                                   R_cosine
angular            function (x, y) 1 - acos(crossprod(x, y)/sqrt(crossprod(x) * crossprod(y)))/pi
eJaccard                                                                               R_ejaccard
eDice                                                                                     R_edice
correlation                                                                                pr_cor
Chi-squared                                                                         pr_ChiSquared
Phi-squared                                                                         pr_PhiSquared
Tschuprow                                                                            pr_Tschuprow
Cramer                                                                                  pr_Cramer
Pearson                                                                                pr_Pearson
Gower                                                                                    pr_Gower
Euclidean                                                                        R_euclidean_dist
Mahalanobis                                                                        pr_Mahalanobis
Bhjattacharyya                                                                  pr_Bhjattacharyya
Manhattan                                                                        R_manhattan_dist
supremum                                                                           R_maximum_dist
Minkowski                                                                        R_minkowski_dist
Canberra                                                                          R_canberra_dist
Wave                                                                                pr_WaveHedges
divergence                                                                          pr_Divergence
Kullback                                                                       pr_KullbackLeibler
Bray                                                                                pr_BrayCurtis
Soergel                                                                                pr_Soergel
Levenshtein                                                                                sdists
Podani                                                                                  pr_Podani
Chord           function (x, y) sqrt(2 * (1 - crossprod(x, y)/sqrt(crossprod(x) * crossprod(y))))
Geodesic                  function (x, y) acos(crossprod(x, y)/sqrt(crossprod(x) * crossprod(y)))
Whittaker                                         function (x, y) sum(abs(x/sum(x) - y/sum(y)))/2
Hellinger                        function (x, y) sqrt(crossprod(sqrt(x/sum(x)) - sqrt(y/sum(y))))
fJaccard                                                                             R_fuzzy_dist
test                                                                        function (x, y) x * y
                distance                PREFUN
Jaccard            FALSE     pr_Jaccard_prefun
Kulczynski1        FALSE                  <NA>
Kulczynski2        FALSE                  <NA>
Mountford          FALSE                  <NA>
Fager              FALSE                  <NA>
Russel             FALSE                  <NA>
simple matching    FALSE                  <NA>
Hamman             FALSE                  <NA>
Faith              FALSE                  <NA>
Tanimoto           FALSE                  <NA>
Dice               FALSE                  <NA>
Phi                FALSE                  <NA>
Stiles             FALSE                  <NA>
Michael            FALSE                  <NA>
Mozley             FALSE                  <NA>
Yule               FALSE                  <NA>
Yule2              FALSE                  <NA>
Ochiai             FALSE                  <NA>
Simpson            FALSE                  <NA>
Braun-Blanquet     FALSE                  <NA>
cosine             FALSE         pr_cos_prefun
angular            FALSE                  <NA>
eJaccard           FALSE    pr_eJaccard_prefun
eDice              FALSE       pr_eDice_prefun
correlation        FALSE                  <NA>
Chi-squared        FALSE                  <NA>
Phi-squared        FALSE                  <NA>
Tschuprow          FALSE                  <NA>
Cramer             FALSE                  <NA>
Pearson            FALSE                  <NA>
Gower              FALSE       pr_Gower_prefun
Euclidean           TRUE   pr_Euclidean_prefun
Mahalanobis         TRUE pr_Mahalanobis_prefun
Bhjattacharyya      TRUE                  <NA>
Manhattan           TRUE   pr_Manhattan_prefun
supremum            TRUE    pr_supremum_prefun
Minkowski           TRUE   pr_Minkowski_prefun
Canberra            TRUE    pr_Canberra_prefun
Wave                TRUE                  <NA>
divergence          TRUE                  <NA>
Kullback            TRUE                  <NA>
Bray                TRUE                  <NA>
Soergel             TRUE                  <NA>
Levenshtein         TRUE pr_Levenshtein_prefun
Podani              TRUE                  <NA>
Chord               TRUE                  <NA>
Geodesic            TRUE                  <NA>
Whittaker           TRUE                  <NA>
Hellinger           TRUE                  <NA>
fJaccard            TRUE    pr_fJaccard_prefun
test                TRUE                  <NA>
## delete test entry
pr_DB$delete_entry("test")

## check if it is really gone
pr_DB$entry_exists("test")
[1] FALSE

[Package proxy version 0.4-27 Index]