1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
|
def PAM(iteration, k, data): data_numSamples, data_dim = data.shape
data_new = copy.deepcopy(data) data_now = copy.deepcopy(data)
center_point = np.random.choice(data_numSamples, k, replace=False) center = data_new[center_point, :(data_dim-2)]
distance = [[] for i in range(k)] distance_now = [[] for i in range(k)] lost = np.ones([data_numSamples, k]) * float('inf')
for j in range(k): distance[j] = np.sqrt(np.sum(np.square(data_new[:, :(data_dim-2)] - np.array(center[j])), axis=1)) data_new[:, data_dim-1] = np.argmin(np.array(distance), axis=0)
for i in range(iteration):
for m in range(k):
for l in range(data_numSamples):
center_now = copy.deepcopy(center) center_now[m] = data_now[l, :(data_dim-2)] for j in range(k): distance_now[j] = np.sqrt(np.sum(np.square(data_now[:, :(data_dim-2)] - np.array(center_now[j])), axis=1)) data_now[:, (data_dim-1)] = np.argmin(np.array(distance), axis=0)
lost[l, m] = (caclEucDistance(data_now[:, :(data_dim-2)], center_now[data_now[:, (data_dim-1)].astype(int)]) \ - caclEucDistance(data_now[:, :(data_dim-2)], center[data_new[:, (data_dim-1)].astype( int)])).sum()
if np.min(lost) < 0: index = np.where(np.min(lost) == lost) index_l = index[0][0] index_m = index[1][0]
center[index_m] = data_now[index_l, :data_dim-2]
for j in range(k): distance[j] = np.sqrt(np.sum(np.square(data_now[:, :(data_dim-2)] - np.array(center[j])), axis=1)) data_new[:, (data_dim-1)] = np.argmin(np.array(distance), axis=0)
return center ,data_new
|