Aliases: pr_DB registry summary.pr_DB
Keywords: cluster
### ** Examples ## create a new distance measure mydist <- function(x,y) x * y ## create a new entry in the registry with two aliases pr_DB$set_entry(FUN = mydist, names = c("test", "mydist")) ## look it up (index is case insensitive): pr_DB$get_entry("TEST")
names test, mydist FUN function (x, y) x * y distance TRUE PREFUN NA POSTFUN NA convert NA type other loop TRUE C_FUN FALSE PACKAGE proxy abcd FALSE formula NA reference NA description NA
## modify the content of the description field in the new entry pr_DB$modify_entry(names = "test", description = "foo function") ## create a new field pr_DB$set_field("New") ## look up the test entry again (two ways) pr_DB$get_entry("test")
names test, mydist FUN function (x, y) x * y distance TRUE PREFUN NA POSTFUN NA convert NA type other loop TRUE C_FUN FALSE PACKAGE proxy abcd FALSE formula NA reference NA description foo function New NA
pr_DB[["test"]]
names test, mydist FUN function (x, y) x * y distance TRUE PREFUN NA POSTFUN NA convert NA type other loop TRUE C_FUN FALSE PACKAGE proxy abcd FALSE formula NA reference NA description foo function New NA
## show total number of entries length(pr_DB)
[1] 51
## show all entries (short list) pr_DB$get_entries(pattern = "foo")
$test names test, mydist FUN function (x, y) x * y distance TRUE PREFUN NA POSTFUN NA convert NA type other loop TRUE C_FUN FALSE PACKAGE proxy abcd FALSE formula NA reference NA description foo function New NA
## show more details summary(pr_DB, "long")
* Similarity measures: Jaccard/binary/Reyssac/Roux (binary) = a / (a + b + c) Kulczynski1 (binary) = a / (b + c) Kulczynski2 (binary) = [a / (a + b) + a / (a + c)] / 2 Mountford (binary) = 2a / (ab + ac + 2bc) Fager/McGowan (binary) = a / sqrt((a + b)(a + c)) - sqrt(a + c) / 2 Russel/Rao (binary) = a / n simple matching/Sokal/Michener (binary) = (a + d) / n Hamman (binary) = ([a + d] - [b + c]) / n Faith (binary) = (a + d/2) / n Tanimoto/Rogers (binary) = (a + d) / (a + 2b + 2c + d) Dice/Czekanowski/Sorensen (binary) = 2a / (2a + b + c) Phi (binary) = (ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)] Stiles (binary) = log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)]) Michael (binary) = 4(ad - bc) / [(a + d)^2 + (b + c)^2] Mozley/Margalef (binary) = an / (a + b)(a + c) Yule (binary) = (ad - bc) / (ad + bc) Yule2 (binary) = (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc)) Ochiai (binary) = a / sqrt[(a + b)(a + c)] Simpson (binary) = a / min{(a + b), (a + c)} Braun-Blanquet (binary) = a / max{(a + b), (a + c)} cosine (metric) = xy / sqrt(xx * yy) angular (metric) = 1 - acos(xy / sqrt(xx * yy)) / pi eJaccard/extended_Jaccard (metric) = xy / (xx + yy - xy) eDice/extended_Dice/eSorensen (metric) = 2xy / (xx + yy) correlation (metric) = xy / sqrt(xx * yy) for centered x,y Chi-squared (nominal) = sum_ij (o_i - e_i)^2 / e_i Phi-squared (nominal) = [sum_ij (o_i - e_i)^2 / e_i] / n Tschuprow (nominal) = sqrt{[sum_ij (o_i - e_i)^2 / e_i] / n / sqrt((p - 1)(q - 1))} Cramer (nominal) = sqrt{[Chi / n)] / min[(p - 1), (q - 1)]} Pearson/contingency (nominal) = sqrt{Chi / (n + Chi)} Gower (other) = Sum_k (s_ijk * w_k) / Sum_k (d_ijk * w_k) * Distance measures: Euclidean/L2 (metric) = sqrt(sum_i (x_i - y_i)^2)) Mahalanobis (metric) = sqrt((x - y) Sigma^(-1) (x - y)) Bhjattacharyya (metric) = sqrt(sum_i (sqrt(x_i) - sqrt(y_i))^2)) Manhattan/City-Block/L1/taxi (metric) = sum_i |x_i - y_i| supremum/max/maximum/Tschebyscheff/Chebyshev (metric) = max_i |x_i - y_i| Minkowski/Lp (metric) = (sum_i (x_i - y_i)^p)^(1/p) Canberra (metric) = sum_i |x_i - y_i| / |x_i + y_i| Wave/Hedges (metric) = sum_i (1 - min(x_i, y_i) / max(x_i, y_i)) divergence (metric) = sum_i (x_i - y_i)^2 / (x_i + y_i)^2 Kullback/Leibler (metric) = sum_i [x_i * log((x_i / sum_j x_j) / (y_i / sum_j y_j)) / sum_j x_j)] Bray/Curtis (metric) = sum_i |x_i - y_i| / sum_i (x_i + y_i) Soergel (metric) = sum_i |x_i - y_i| / sum_i max{x_i, y_i} Levenshtein (other) = Number of insertions, edits, and deletions between to strings Podani/discordance (metric) = 1 - 2 * (a - b + c - d) / (n * (n - 1)) Chord (metric) = sqrt(2 * (1 - xy / sqrt(xx * yy))) Geodesic (metric) = arccos(xy / sqrt(xx * yy)) Whittaker (metric) = sum_i |x_i / sum_i x - y_i / sum_i y| / 2 Hellinger (metric) = sqrt(sum_i (sqrt(x_i / sum_i x) - sqrt(y_i / sum_i y)) ^ 2) fJaccard/fuzzy_Jaccard (metric) = sum_i (min{x_i, y_i} / max{x_i, y_i}) test/mydist (other) = NA
## get all entries in a list (and extract first two ones) pr_DB$get_entries()[1:2]
$Jaccard names Jaccard, binary, Reyssac, Roux FUN R_bjaccard distance FALSE PREFUN pr_Jaccard_prefun POSTFUN NA convert pr_simil2dist type binary loop FALSE C_FUN TRUE PACKAGE proxy abcd FALSE formula a / (a + b + c) reference Jaccard, P. (1908). Nouvelles recherches sur la distribution florale. Bull. Soc. Vaud. Sci. Nat., 44, pp. 223--270. description The Jaccard Similarity (C implementation) for binary data. It is the proportion of (TRUE, TRUE) pairs, but not considering (FALSE, FALSE) pairs. So it compares the intersection with the union of object sets. New NA $Kulczynski1 names Kulczynski1 FUN pr_Kulczynski1 distance FALSE PREFUN NA POSTFUN NA convert pr_simil2dist type binary loop TRUE C_FUN FALSE PACKAGE proxy abcd TRUE formula a / (b + c) reference Kurzcynski, T.W. (1970). Generalized distance and discrete variables. Biometrics, 26, pp. 525--534. description Kulczynski Similarity for binary data. Relates the (TRUE, TRUE) pairs to discordant pairs. New NA
## get all entries as a data frame (select first 3 fields) as.data.frame(pr_DB)[,1:3]
FUN Jaccard R_bjaccard Kulczynski1 pr_Kulczynski1 Kulczynski2 pr_Kulczynski2 Mountford pr_Mountford Fager pr_fagerMcgowan Russel pr_RusselRao simple matching pr_SimpleMatching Hamman pr_Hamman Faith pr_Faith Tanimoto pr_RogersTanimoto Dice pr_Dice Phi pr_Phi Stiles pr_Stiles Michael pr_Michael Mozley pr_MozleyMargalef Yule pr_Yule Yule2 pr_Yule2 Ochiai pr_Ochiai Simpson pr_Simpson Braun-Blanquet pr_BraunBlanquet cosine R_cosine angular function (x, y) 1 - acos(crossprod(x, y)/sqrt(crossprod(x) * crossprod(y)))/pi eJaccard R_ejaccard eDice R_edice correlation pr_cor Chi-squared pr_ChiSquared Phi-squared pr_PhiSquared Tschuprow pr_Tschuprow Cramer pr_Cramer Pearson pr_Pearson Gower pr_Gower Euclidean R_euclidean_dist Mahalanobis pr_Mahalanobis Bhjattacharyya pr_Bhjattacharyya Manhattan R_manhattan_dist supremum R_maximum_dist Minkowski R_minkowski_dist Canberra R_canberra_dist Wave pr_WaveHedges divergence pr_Divergence Kullback pr_KullbackLeibler Bray pr_BrayCurtis Soergel pr_Soergel Levenshtein sdists Podani pr_Podani Chord function (x, y) sqrt(2 * (1 - crossprod(x, y)/sqrt(crossprod(x) * crossprod(y)))) Geodesic function (x, y) acos(crossprod(x, y)/sqrt(crossprod(x) * crossprod(y))) Whittaker function (x, y) sum(abs(x/sum(x) - y/sum(y)))/2 Hellinger function (x, y) sqrt(crossprod(sqrt(x/sum(x)) - sqrt(y/sum(y)))) fJaccard R_fuzzy_dist test function (x, y) x * y distance PREFUN Jaccard FALSE pr_Jaccard_prefun Kulczynski1 FALSE <NA> Kulczynski2 FALSE <NA> Mountford FALSE <NA> Fager FALSE <NA> Russel FALSE <NA> simple matching FALSE <NA> Hamman FALSE <NA> Faith FALSE <NA> Tanimoto FALSE <NA> Dice FALSE <NA> Phi FALSE <NA> Stiles FALSE <NA> Michael FALSE <NA> Mozley FALSE <NA> Yule FALSE <NA> Yule2 FALSE <NA> Ochiai FALSE <NA> Simpson FALSE <NA> Braun-Blanquet FALSE <NA> cosine FALSE pr_cos_prefun angular FALSE <NA> eJaccard FALSE pr_eJaccard_prefun eDice FALSE pr_eDice_prefun correlation FALSE <NA> Chi-squared FALSE <NA> Phi-squared FALSE <NA> Tschuprow FALSE <NA> Cramer FALSE <NA> Pearson FALSE <NA> Gower FALSE pr_Gower_prefun Euclidean TRUE pr_Euclidean_prefun Mahalanobis TRUE pr_Mahalanobis_prefun Bhjattacharyya TRUE <NA> Manhattan TRUE pr_Manhattan_prefun supremum TRUE pr_supremum_prefun Minkowski TRUE pr_Minkowski_prefun Canberra TRUE pr_Canberra_prefun Wave TRUE <NA> divergence TRUE <NA> Kullback TRUE <NA> Bray TRUE <NA> Soergel TRUE <NA> Levenshtein TRUE pr_Levenshtein_prefun Podani TRUE <NA> Chord TRUE <NA> Geodesic TRUE <NA> Whittaker TRUE <NA> Hellinger TRUE <NA> fJaccard TRUE pr_fJaccard_prefun test TRUE <NA>
## delete test entry pr_DB$delete_entry("test") ## check if it is really gone pr_DB$entry_exists("test")
[1] FALSE