첨부 실행 코드는 나눔고딕코딩 폰트를 사용합니다.
------------------------------------------------------------------------------------------------------------------------------------------------------
728x90
728x170

■ K-평균 군집화(K-Means Clustering) 알고리즘을 사용하는 방법을 보여준다.

▶ kmean.py

import matplotlib.pyplot as pp
import numpy as np
import pandas as pd
import tensorflow as tf

def DisplayPartition(xValueList, yValueList, assignmentValueNDArray):
    labelList = []
    colorList = ["red", "blue", "green", "yellow"]
    for i in range(len(assignmentValueNDArray)):
        labelList.append(colorList[(assignmentValueNDArray[i])])
    dataFrame = pd.DataFrame(dict(x = xValueList, y = yValueList, color = labelList))
    _, axexSubPlot = pp.subplots()
    axexSubPlot.scatter(dataFrame["x"], dataFrame["y"], c = dataFrame["color"])
    pp.show()

vectorCount           = 2000
clusterCount          = 4
sampleCountPerCluster = 500
stepCount             = 1000
xValueList            = []
yValueList            = []
vectorList            = []

# 랜덤 데이터를 생성한다.
for i in range(vectorCount):
    if np.random.random() > 0.5:
        xValueList.append(np.random.normal(0.4, 0.7))
        yValueList.append(np.random.normal(0.2, 0.8))
    else:
        xValueList.append(np.random.normal(0.6, 0.4))
        yValueList.append(np.random.normal(0.8, 0.5))

vectorList = list(zip(xValueList, yValueList))

vectorTensor = tf.constant(vectorList)

vectorListCount = tf.shape(vectorList)[0]

randomIndexTensor = tf.random_shuffle(tf.range(0, vectorListCount))

begin = [0,]

size    = [clusterCount,]
size[0] = clusterCount

centroidIndexTensor     = tf.slice(randomIndexTensor, begin, size)
centroidVariable        = tf.Variable(tf.gather(vectorList, centroidIndexTensor))
expandedVectorTensor    = tf.expand_dims(vectorTensor, 0)
expandedCentroidVector  = tf.expand_dims(centroidVariable, 1)
subtractedVectorTensor  = tf.subtract(expandedVectorTensor, expandedCentroidVector)
euclideanDistanceTensor = tf.reduce_sum(tf.square(subtractedVectorTensor), 2)
assignmentTensor        = tf.to_int32(tf.argmin(euclideanDistanceTensor, 0))

partitionList = tf.dynamic_partition(vectorTensor, assignmentTensor, clusterCount)

for partition in partitionList:
    updatedCentroidTensor = tf.concat(tf.expand_dims(tf.reduce_mean(partition, 0), 0), 0)

initializeOperation = tf.global_variables_initializer()

sess = tf.Session()

sess.run(initializeOperation)

for step in range(stepCount):
    _, centroidValueNDArray, assignmentValueNDArray = sess.run([updatedCentroidTensor, centroidVariable, assignmentTensor])

DisplayPartition(xValueList, yValueList, assignmentValueNDArray)

pp.plot(xValueList,yValueList, "o", label = "Input Data")
pp.legend()
pp.show()
728x90
그리드형(광고전용)
Posted by icodebroker
,