Skip to contents

K-Centroids Cluster Analysis - a unified framework for partitional clustering with selectable distance / centroid families: standard k-means, k-medians, spherical k-means ("angle"), Jaccard, and extended Jaccard. Calls flexclust::kcca() from package flexclust.

The k parameter is set to 2 by default since flexclust::kcca() has no default value for the number of clusters. Predictions dispatch to flexclust's S4 predict method via methods::getMethod("predict", "kccasimple") rather than calling predict() directly, since both flexclust and kernlab define an S4 class named "kcca" and the resulting class-cache collision can break S4 dispatch when both packages are loaded.

Dictionary

This mlr3::Learner can be instantiated via the dictionary mlr3::mlr_learners or with the associated sugar function mlr3::lrn():

mlr_learners$get("clust.kcca")
lrn("clust.kcca")

Meta Information

  • Task type: “clust”

  • Predict Types: “partition”

  • Feature Types: “logical”, “integer”, “numeric”

  • Required Packages: mlr3, mlr3cluster, flexclust

Parameters

IdTypeDefaultLevelsRange
kinteger-\([1, \infty)\)
familycharacterkmeanskmeans, kmedians, angle, jaccard, ejaccard-
weightsuntyped--
groupuntyped--
simplelogicalFALSETRUE, FALSE-
save.datalogicalFALSETRUE, FALSE-
iter.maxinteger200\([1, \infty)\)
tolerancenumeric1e-06\([0, \infty)\)
verboseinteger0\([0, \infty)\)
classifycharacterautoauto, weighted, hard-
initcentuntyped--
gammanumeric1\([0, \infty)\)
ntryinteger5\([1, \infty)\)
min.sizeinteger2\([1, \infty)\)

References

Leisch, Friedrich (2006). “A Toolbox for K-Centroids Cluster Analysis.” Computational Statistics & Data Analysis, 51(2), 526–544. doi:10.1016/j.csda.2005.10.006 .

See also

Other Learner: mlr_learners_clust.MBatchKMeans, mlr_learners_clust.SimpleKMeans, mlr_learners_clust.agnes, mlr_learners_clust.ap, mlr_learners_clust.bico, mlr_learners_clust.birch, mlr_learners_clust.clara, mlr_learners_clust.cmeans, mlr_learners_clust.cobweb, mlr_learners_clust.dbscan, mlr_learners_clust.dbscan_fpc, mlr_learners_clust.diana, mlr_learners_clust.em, mlr_learners_clust.fanny, mlr_learners_clust.featureless, mlr_learners_clust.ff, mlr_learners_clust.flexmix, mlr_learners_clust.genie, mlr_learners_clust.hclust, mlr_learners_clust.hdbscan, mlr_learners_clust.kkmeans, mlr_learners_clust.kmeans, mlr_learners_clust.kproto, mlr_learners_clust.mclust, mlr_learners_clust.meanshift, mlr_learners_clust.movMF, mlr_learners_clust.optics, mlr_learners_clust.pam, mlr_learners_clust.protoclust, mlr_learners_clust.skmeans, mlr_learners_clust.som, mlr_learners_clust.specc, mlr_learners_clust.stdbscan, mlr_learners_clust.tclust, mlr_learners_clust.xmeans

Super classes

mlr3::Learner -> LearnerClust -> LearnerClustKCCA

Methods

Inherited methods


LearnerClustKCCA$new()

Creates a new instance of this R6 class.

Usage


LearnerClustKCCA$clone()

The objects of this class are cloneable with this method.

Usage

LearnerClustKCCA$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Examples

# Define the Learner and set parameter values
learner = lrn("clust.kcca")
print(learner)
#> 
#> ── <LearnerClustKCCA> (clust.kcca): K-Centroids Cluster Analysis ───────────────
#> • Model: -
#> • Parameters: k=2
#> • Packages: mlr3, mlr3cluster, and flexclust
#> • Predict Types: [partition]
#> • Feature Types: logical, integer, and numeric
#> • Encapsulation: none (fallback: -)
#> • Properties: complete, exclusive, and partitional
#> • Other settings: use_weights = 'error', predict_raw = 'FALSE'

# Define a Task
task = tsk("usarrests")

# Train the learner on the task
learner$train(task)
#> Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
#> Also defined by ‘flexclust’
#> Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
#> Also defined by ‘flexclust’

# Print the model
print(learner$model)
#> kcca object of family ‘kmeans’ 
#> 
#> call:
#> flexclust::kcca(x = as.matrix(task$data()), k = 2L, family = new("kccaFamily", 
#>     name = "kmeans", dist = function (x, centers) 
#>     {
#>         if (ncol(x) != ncol(centers)) 
#>             stop(sQuote("x"), " and ", sQuote("centers"), " must have the same number of columns")
#>         z <- matrix(0, nrow = nrow(x), ncol = nrow(centers))
#>         for (k in 1:nrow(centers)) {
#>             z[, k] <- sqrt(colSums((t(x) - centers[k, ])^2))
#>         }
#>         z
#>     }, cent = function (x) 
#>     colMeans(x), allcent = function (x, cluster, k = max(cluster, 
#>         na.rm = TRUE)) 
#>     {
#>         centers <- matrix(NA, nrow = k, ncol = ncol(x))
#>         for (n in 1:k) {
#>             if (sum(cluster == n, na.rm = TRUE) > 0) {
#>                 centers[n, ] <- z@cent(x[cluster == n, , drop = FALSE])
#>             }
#>         }
#>         centers
#>     }, wcent = function (x, weights) 
#>     colMeans(x * normWeights(weights)), weighted = TRUE, cluster = function (x, 
#>         centers, n = 1, distmat = NULL) 
#>     {
#>         if (is.null(distmat)) 
#>             distmat <- z@dist(x, centers)
#>         if (n == 1) {
#>             return(max.col(-distmat))
#>         }
#>         else {
#>             r <- t(matrix(apply(distmat, 1, rank, ties.method = "random"), 
#>                 nrow = ncol(distmat)))
#>             z <- list()
#>             for (k in 1:n) z[[k]] <- apply(r, 1, function(x) which(x == 
#>                 k))
#>         }
#>         return(z)
#>     }, preproc = function (x) 
#>     x, groupFun = function (cluster, group, distmat) 
#>     {
#>         G <- levels(group)
#>         x <- matrix(0, ncol = ncol(distmat), nrow = length(G))
#>         for (n in 1:length(G)) {
#>             x[n, ] <- colSums(distmat[group == G[n], , drop = FALSE])
#>         }
#>         m <- max.col(-x)
#>         names(m) <- G
#>         z <- m[group]
#>         names(z) <- NULL
#>         if (is.list(cluster)) {
#>             x[cbind(1:nrow(x), m)] <- Inf
#>             m <- max.col(-x)
#>             names(m) <- G
#>             z1 <- m[group]
#>             names(z1) <- NULL
#>             z <- list(z, z1)
#>         }
#>         z
#>     }, genDist = function () 
#>     NULL))
#> 
#> cluster sizes:
#> 
#>  1  2 
#> 21 29 
#> 

# Make predictions for the task
prediction = learner$predict(task)

# Score the predictions
prediction$score(task = task)
#> clust.dunn 
#>  0.1033191