#!/usr/bin/env ruby k = 3 # k clusters re = /^(\d+)\s+(\d+)/ INFINITY = 0x7fffffff # read data nodes = Array.new # array of array for data points: [x, y, cluster_index] centroids = Array.new # array of array for centroids: [x, y] ARGF.each_line do |line| if re.match(line) c = rand(k) # randomly assign initial cluster nodes.push [$1.to_i, $2.to_i, c] end end round = 0 begin updated = false # assignment step: assign each node to the closest centroid if round != 0 # skip assignment for the 1st round nodes.each do |node| dist2 = INFINITY # square of dsistance to the closest centroid cluster = 0 # closest cluster index for i in (0 .. k - 1) d2 = (node[0] - centroids[i][0])**2 + (node[1] - centroids[i][1])**2 if d2 < dist2 dist2 = d2 cluster = i end end node[2] = cluster end end # update step: compute new centroids sums = Array.new(k) clsize = Array.new(k) for i in (0 .. k - 1) sums[i] = [0, 0] clsize[i] = 0 end nodes.each do |node| i = node[2] sums[i][0] += node[0] sums[i][1] += node[1] clsize[i] += 1 end for i in (0 .. k - 1) newcenter = [Float(sums[i][0]) / clsize[i], Float(sums[i][1]) / clsize[i]] if round == 0 || newcenter[0] != centroids[i][0] || newcenter[1] != centroids[i][1] centroids[i] = newcenter updated = true end end round += 1 end while updated == true # print the results nodes.each do |node| puts "#{node[0]}\t#{node[1]}\t#{node[2]}" end