How to use clustering evaluation elbow method in K-Medoids

i use this code as my reference :

Expand|Select|Wrap|Line Numbers

 import pylab as plt

import numpy as np

from scipy.spatial.distance import cdist, pdist

from sklearn.cluster import KMeans

from sklearn.datasets import load_iris
 
iris = load_iris()
 
k = range(1,11)
 
clusters = [KMeans(n_clusters = c,init = 'k-means++').fit(iris.data) for c in k]

centr_lst = [cc.cluster_centers_ for cc in clusters]
 
k_distance = [cdist(iris.data, cent, 'euclidean') for cent in centr_lst]

clust_indx = [np.argmin(kd,axis=1) for kd in k_distance]

distances = [np.min(kd,axis=1) for kd in k_distance]

avg_within = [np.sum(dist)/iris.data.shape[0] for dist in distances]
 
with_in_sum_square = [np.sum(dist ** 2) for dist in distances]

to_sum_square = np.sum(pdist(iris.data) ** 2)/iris.data.shape[0]

bet_sum_square = to_sum_square - with_in_sum_square
 
kidx = 2
 
fig = plt.figure()

ax = fig.add_subplot(111)

ax.plot(k, avg_within, 'g*-')

ax.plot(k[kidx], avg_within[kidx], marker='o', markersize=12, \

markeredgewidth=2, markeredgecolor='r', markerfacecolor='None')

plt.grid(True)

plt.xlabel('Number of clusters')

plt.ylabel('Average within-cluster sum of squares')

plt.title('Elbow for KMeans clustering (IRIS Data)')

i want to change K-Means with K-Medoids.
this is my k-medoids code :

Expand|Select|Wrap|Line Numbers

 
import numpy as np

import matplotlib.pyplot as plt

import matplotlib.cm as cm

from copy import deepcopy

from IPython import embed

import time
 
def _get_init_centers(n_clusters, n_samples):

    '''return random points as initial centers'''

    init_ids = []

    while len(init_ids) < n_clusters:

        _ = np.random.randint(0,n_samples)

        if not _ in init_ids:

            init_ids.append(_)

    return init_ids
 
def _get_distance(data1, data2):

    '''example distance function'''

    return np.sqrt(np.sum((data1 - data2)**2))
 
def _get_cost(X, centers_id, dist_func):

    '''return total cost and cost of each cluster'''

    st = time.time()

    dist_mat = np.zeros((len(X),len(centers_id)))

    # compute distance matrix

    for j in range(len(centers_id)):

        center = X[centers_id[j],:]

        for i in range(len(X)):

            if i == centers_id[j]:

                dist_mat[i,j] = 0.

            else:

                dist_mat[i,j] = dist_func(X[i,:], center)

    #print 'cost ', -st+time.time()

    mask = np.argmin(dist_mat,axis=1)

    members = np.zeros(len(X))

    costs = np.zeros(len(centers_id))

    for i in range(len(centers_id)):

        mem_id = np.where(mask==i)

        members[mem_id] = i

        costs[i] = np.sum(dist_mat[mem_id,i])

    return members, costs, np.sum(costs), dist_mat
 
def _kmedoids_run(X, n_clusters, dist_func, max_iter=3, tol=0.000001, verbose=True):

    '''run algorithm return centers, members, and etc.'''

    # Get initial centers

    n_samples, n_features = X.shape

    init_ids = _get_init_centers(n_clusters,n_samples)

    if verbose:

        print 'Initial centers are ', init_ids

    centers = init_ids

    members, costs, tot_cost, dist_mat = _get_cost(X, init_ids,dist_func)

    cc,SWAPED = 0, True

    while True:

        SWAPED = False

        for i in range(n_samples):

            if not i in centers:

                for j in range(len(centers)):

                    centers_ = deepcopy(centers)

                    centers_[j] = i

                    members_, costs_, tot_cost_, dist_mat_ = _get_cost(X, centers_,dist_func)

                    if tot_cost_-tot_cost < tol:

                        members, costs, tot_cost, dist_mat = members_, costs_, tot_cost_, dist_mat_

                        centers = centers_

                        SWAPED = True

                        if verbose:

                            print 'Change centers to ', centers

        if cc > max_iter:

            if verbose:

                print 'End Searching by reaching maximum iteration', max_iter

            break

        if not SWAPED:

            if verbose:

                print 'End Searching by no swaps'

            break

        cc += 1

    return centers,members, costs, tot_cost, dist_mat
 
class KMedoids(object):

    '''

    Main API of KMedoids Clustering
 
    Parameters

    --------

        n_clusters: number of clusters

        dist_func : distance function

        max_iter: maximum number of iterations

        tol: tolerance
 
    Attributes

    --------

        labels_    :  cluster labels for each data item

        centers_   :  cluster centers id

        costs_     :  array of costs for each cluster

        n_iter_    :  number of iterations for the best trail
 
    Methods

    -------

        fit(X): fit the model

            - X: 2-D numpy array, size = (n_sample, n_features)
 
        predict(X): predict cluster id given a test dataset.

    '''

    def __init__(self, n_clusters, dist_func=_get_distance, max_iter=3, tol=0.000001):

        self.n_clusters = n_clusters

        self.dist_func = dist_func

        self.max_iter = max_iter

        self.tol = tol
 
    def fit(self, X, plotit=True, verbose=True):

        centers, members, costs, tot_cost, dist_mat = _kmedoids_run(

            X, self.n_clusters, self.dist_func, max_iter=self.max_iter, tol=self.tol, verbose=verbose)

        if plotit:

            fig = plt.figure()

            ax = fig.add_subplot(111)
 
            for i in range(len(centers)):

                X_c = X[members == i, :]

                ax.scatter(X_c[:, 0], X_c[:, 1], label = i+1,alpha=0.5, s=30)

                ax.scatter(X[centers[i], 0], X[centers[i], 1],alpha=1., s=250, marker='*')

            #ax.legend(bbox_to_anchor=(1, 1), fontsize="small", loc=2, borderaxespad=0.)

            colormap = plt.cm.gist_ncar  # nipy_spectral, Set1,Paired

            colorst = [colormap(i) for i in np.linspace(0, 0.9, len(ax.collections))]

            for t, j1 in enumerate(ax.collections):

                j1.set_color(colorst[t])
 
        return
 
    def predict(self,X):

        raise NotImplementedError()

could you halp how to do it?
thanks

Oct 24 '16 #1

Subscribe Post Reply

2221

Similar topics

Delayed evaluation and setdefault()

by: Leo Breebaart | last post by:

Hi all, I have a question about Python and delayed evaluation. Short-circuiting of Boolean expressions implies that in: >>> if a() and b(): any possible side-effects the call to b() might...

Python

can not access clustering SQL Server after relocation

by: willie | last post by:

Hi all: I have a clustering SQL Server on Node1 and Node2, the Node1 has named Instance1 and Node2 has named Instance2, no default instance. We tested it that everthing is OK, then we decide to...

Microsoft SQL Server

Unused method is acting up

by: Martin Magnusson | last post by:

Hi, I have defined a method of a base class which writes to a C-style array without initializing it. (No, I don't like C arrays either, but I have to use it here because of another library I'm...

C / C++

Order of evaluation of function arguments

by: dragoncoder | last post by:

Consider the following code. #include <stdio.h> int main() { int i =1; printf("%d ,%d ,%d\n",i,++i,i++); return 0; }

C / C++

DB2 Clustering

by: chmmr | last post by:

Hi, I am currently in the process of gathering info/experiences for an incoming Linux DB2 clustering phase we actually know nothing about (since we are doing it for the first time ever), so I...

DB2 Database

Software clustering for Asp.net 2.0

by: dejavue82 | last post by:

Hi, Does anybody know of a software package that allows for several servers, running asp.net 2.0 to be clustered, regardless of where they are located (ie. without a hardware load balancer)....

ASP.NET

K-mean clustering algorithm!

by: Shum | last post by:

Hi! I need help ragarding the k-mean clustering algo. Any one have a source code in c#.. or a dll file that could be used in the project.... I cannot seem to identify the objects that could be...

C# / C Sharp

Lazy evaluation: overloading the assignment operator?

by: sturlamolden | last post by:

Python allows the binding behaviour to be defined for descriptors, using the __set__ and __get__ methods. I think it would be a major advantage if this could be generalized to any object, by...

Python

A simple question about "clustering" ...

by: Lakesider | last post by:

Hi NG, I have a question about data: I have travel-times from A to B like this from | to | sec. A B 17 A B 18 A B 30 A B 32

C# / C Sharp

Clustering, Security, Performance, Load Balance

by: Manish | last post by:

I think this question has been asked number of times. However, I am looking for some specific information. Perhaps some of you can help close the gap. Or perhaps you can point me towards right...

Microsoft SQL Server

Looking to do Android software development, any suggestions? Is flutter better?

by: nemocccc | last post by:

hello, everyone, I want to develop a software for my android phone for daily needs, any suggestions?

General

How to build RAID in BIOS?

by: Hystou | last post by:

There are some requirements for setting up RAID: 1. The motherboard and BIOS support RAID configuration. 2. The motherboard has 2 or more available SATA protocol SSD/HDD slots (including MSATA, M.2...

Computer Hardware

Changing the language in Windows 10

by: Hystou | last post by:

Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can...

Windows Server

Problem With Comparison Operator <=> in G++

by: Oralloy | last post by:

Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers,...

C / C++

Maximizing Business Potential: The Nexus of Website Design and Digital Marketing

by: jinu1996 | last post by:

In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven...

Online Marketing

The easy way to turn off automatic updates for Windows 10/11

by: Hystou | last post by:

Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows...

Windows Server

Discussion: How does Zigbee compare with other wireless protocols in smart home applications?

by: tracyyun | last post by:

Dear forum friends, With the development of smart home technology, a variety of wireless communication protocols have appeared on the market, such as Zigbee, Z-Wave, Wi-Fi, Bluetooth, etc. Each...

General

AI Job Threat for Devs

by: agi2029 | last post by:

Let's talk about the concept of autonomous AI software engineers and no-code agents. These AIs are designed to manage the entire lifecycle of a software development project—planning, coding, testing,...

Career Advice

Access Europe - Using VBA to create a class based on a table - Wed 1 May

by: isladogs | last post by:

The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new...

Microsoft Access / VBA