반응형

K-means Clustering 테스트 자료 


import tensorflow as tf
from tensorflow.contrib.factorization import KMeans
from tensorflow.python.framework import ops


k = 3 #3개의 Cluster로 설정
num_features = 3 #데이터 Feature 2 (카테고리 분류 코드, 제목)

# 데이터 읽어오기
Data_X = []
with open("C:/Users/N3815/Desktop/sample_kmeans_data.txt", 'r') as f:
for line in f.readlines():
dump = []
dump.append(float(line.split()[1].split(":")[1]) )
dump.append(float(line.split()[2].split(":")[1]))
dump.append(float(line.split()[3].split(":")[1]))
Data_X.append(dump)

print(Data_X)

X = tf.placeholder(tf.float32, shape = [None, num_features])

kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='squared_euclidean', use_mini_batch=True)

(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, train_op) = kmeans.training_graph()
cluster_idx = cluster_idx[0]
avg_distance = tf.reduce_mean(scores)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
sess.run(init_op, feed_dict={X: Data_X})

#학습
for i in range(1, 100) :
_, d, idx = sess.run([train_op, avg_distance, cluster_idx], feed_dict={X: Data_X})

#확인
print(idx, d)
for i in range(0,k) :
result = []
for j in range(0, idx.size,1) :
if(idx[j] == i):
result.append(Data_X[j])
print(i, '에 속한 데이터 :', result)

0 1:0.0 2:0.0 3:0.0
1 1:0.1 2:0.1 3:0.1
2 1:0.2 2:0.2 3:0.2
3 1:9.0 2:9.0 3:9.0
4 1:9.1 2:9.1 3:9.1
5 1:9.2 2:9.2 3:9.2
6 1:5.5 2:2.5 3:5.7
7 1:5.2 2:2.5 3:5.3
8 1:5.4 2:5.9 3:5.9
9 1:0.1 2:9.0 3:9.1
10 1:9.1 2:9.2 3:9.3

참고 : http://iamksu.tistory.com/84

+ Recent posts