#!/usr/bin/python from pygraphviz import * import random import os class Node: def __init__(self, data, disc): self.data = data self.disc = disc self.L = None self.R = None self.elem_count = 1 def _update_elem_count(self): self.elem_count = 1 if self.L: self.elem_count += self.L.elem_count if self.R: self.elem_count += self.R.elem_count def set_r_child(self, node): self.R = node self._update_elem_count() def set_l_child(self, node): self.L = node self._update_elem_count() def set_children(self, node_l, node_r): self.L = node_l self.R = node_r self._update_elem_count() class RKDTree: def __init__(self, k): self.k = k def join(self, A, B, i): print "entering join" if A == None: # both empty if B == None: return None # only A is empty else: return B # only B is empty elif B == None: return A T = None # A becomes root (probability of n/(n+m)) if random.randint(0, A.elem_count+B.elem_count) < A.elem_count: T = A j_a = A.disc if i == j_a: A.set_r_child(self.join(A.R, B, i)) else: B_l, B_r = self.split(B, A.data, j_a) A.set_children(self.join(A.L, B_l, i), self.join(A.R, B_r, i)) # B becomes root else: T = B j_b = B.disc if i == j_b: B.set_l_child(self.join(A, B.L, i)) else: A_l, A_r = self.split(A, B.data, j_b) B.set_children(self.join(A_l, B.L, i), self.join(A_r, B.R, i)) return T def split(self, T, x, i): print "entering split" L = R = None # Empty tree if T == None: return None, None else: y = T.data j = T.disc if i == j: if y[i] < x[i]: print "i = j and y(i) < x(i)" L = T tmp, R = self.split(T.R, x, i) L.set_r_child(tmp) else: print "i = j and y(i) > x(i)" R = T L, tmp = self.split(T.L, x, i) R.set_l_child(tmp) # Different discriminants else: L_l, L_r = self.split(T.L, x, i) R_l, R_r = self.split(T.R, x, i) if y[i] < x[i]: print "i != j and y(i) < x(i)" L = T L.set_children(L_l, R_l) R = self.join(L_r, R_r, j) else: print "i != j and y(i) > x(i)" R = T R.set_children(L_r, R_r) L = self.join(L_l, R_l, j) return L, R def remove(self, T, x): print "entering remove" if T == None: return None; y = T.data j = T.disc if x[j] < y[j]: T.set_l_child(self.remove(T.L, x)) elif y[j] < x[j]: T.set_r_child(self.remove(T.R, x)) else: # free T here T = self.join(T.L, T.R, j) return T def insert(self, T, x): print "entering insert" i = random.randint(0, self.k-1) N = Node(x, i) return self.insert_at_root(T, N) def insert_at_root(self, T, N): print "entering insert_at_root" # Empty tree if T == None: print ">> Insert", N.data, "as leaf" T = N else: T.elem_count += 1 # 1/1+n -> become root (+1 ?) if random.randint(0, T.elem_count+1) == 0: N_l, N_r = self.split(T, N.data, N.disc) N.set_children(N_l, N_r) T = N else: y = T.data j = T.disc x = N.data i = N.disc if x[j] < y[j]: print x, "goes left at", y, "( i=", i, ", j=", j, ")" T.set_l_child(self.insert_at_root(T.L, N)) else: print x,"goes right at", y, "( i=", i, ", j=", j, ")" T.set_r_child(self.insert_at_root(T.R, N)) return T def graph(self, T): return self._graph(T, AGraph(directed=True)) def _graph(self, T, G): def labelize(T): return str(T.data)+","+str(T.disc)+", nodes#="+str(T.elem_count) if T == None: return G.add_node(labelize(T)) if T.L != None: G.add_edge(labelize(T), labelize(T.L), "L") G = self._graph(T.L, G) if T.R != None: G.add_edge(labelize(T), labelize(T.R), "R") G = self._graph(T.R, G) return G def dump(self, T): if T == None: return print T.data self.dump(T.L) self.dump(T.R) if __name__ == "__main__": rkd = RKDTree(2) data = [] for i in range(0, 10000): data.append((random.randint(0, 200), random.randint(0, 200))) print data T = None i = 0 for d in data: print "* Sending", d T = rkd.insert(T, d) #G = rkd.graph(T) #G.layout() #G.draw("prog/graph%2d.png"%i, prog='dot') #os.system("gimv 'prog/graph%2d.png'"%i) i += 1 print T.elem_count rkd.dump(T) for d in data: print "* Removing", d T = rkd.remove(T, d) #G = rkd.graph(T) #G.layout() #G.draw("prog/graph%2d.png"%i, prog='dot') #os.system("gimv 'prog/graph%2d.png'"%i) i += 1 print T.elem_count #rkd.dump(T)