The BSTree data structure

Agenda

  • API
  • Implementation
    • Search
    • Addition
    • Removal
    • Iteration / Traversal
  • Runtime complexity

API

In [1]:
class BSTree:
    class Node:
        def __init__(self, val, left=None, right=None):
            self.val = val
            self.left = left
            self.right = right
            
    def __init__(self):
        self.size = 0
        self.root = None
    
    def __contains__(self, val):
        """Returns `True` if val is in this tree and `False` otherwise."""
        pass
    
    def add(self, val):
        """Adds `val` to this tree while maintaining BSTree properties."""
        assert(val not in self)
        pass    

    def __delitem__(self, val):
        """Removes `val` from this tree while maintaining BSTree properties."""
        assert(val in self)
        pass
    
    def __iter__(self):
        """Returns an iterator over all the values in the tree, in ascending order."""
        pass

    def __len__(self):
        return self.size
    
    def height(self):
        """Returns the height of the root of the tree."""
        def height_rec(t):
            if not t:
                return 0
            else:
                return 1 + max(height_rec(t.left), height_rec(t.right))
        return height_rec(self.root)

    def pprint(self, width=64):
        """Attempts to pretty-print this tree's contents."""
        height = self.height()
        nodes  = [(self.root, 0)]
        prev_level = 0
        repr_str = ''
        while nodes:
            n,level = nodes.pop(0)
            if prev_level != level:
                prev_level = level
                repr_str += '\n'
            if not n:
                if level < height-1:
                    nodes.extend([(None, level+1), (None, level+1)])
                repr_str += '{val:^{width}}'.format(val='-', width=width//2**level)
            elif n:
                if n.left or level < height-1:
                    nodes.append((n.left, level+1))
                if n.right or level < height-1:
                    nodes.append((n.right, level+1))
                repr_str += '{val:^{width}}'.format(val=n.val, width=width//2**level)
        print(repr_str)
In [2]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3
In [3]:
t.height()
Out[3]:
2
In [4]:
t.pprint()
                               5                                
               2                               10               

Implementation

In [5]:
class BSTree(BSTree):
    def __contains__(self, val):
        def contains_rec(node):
            if not node:
                return False
            elif val < node.val:
                return contains_rec(node.left)
            elif val > node.val:
                return contains_rec(node.right)
            else:
                return True
        return contains_rec(self.root)
In [6]:
t = BSTree()
t.root = BSTree.Node(5,
                    left=BSTree.Node(2),
                    right=BSTree.Node(10))
t.size = 3
In [7]:
5 in t
Out[7]:
True

Addition

In [8]:
class BSTree(BSTree):
    def add(self, val):
        assert(val not in self)
        def add_rec(node):
            if not node:
                return BSTree.Node(val)
            elif val < node.val:
                return BSTree.Node(node.val, left=add_rec(node.left), right=node.right)
            else:
                return BSTree.Node(node.val, left=node.left, right=add_rec(node.right))
        self.root = add_rec(self.root)
        self.size += 1
In [9]:
import random
t = BSTree()
vals = list(range(5))
random.shuffle(vals)
for x in vals:
    t.add(x)
t.pprint()
                               4                                
               0                               -                
       -               2               -               -        
   -       -       1       3       -       -       -       -    
In [10]:
import random
t = BSTree()
vals = list(range(1, 10, 2))
random.shuffle(vals)
for x in vals:
    t.add(x)

assert(all(x in t for x in range(1, 10, 2)))
assert(all(x not in t for x in range(0, 12, 2)))

Removal

In [11]:
class BSTree(BSTree):
    def __delitem__(self, val):
        assert(val in self)
        def delitem_rec(node):
            if val < node.val:
                node.left = delitem_rec(node.left)
                return node
            elif val > node.val:
                node.right = delitem_rec(node.right)
                return node
            else:
                # only deal with simple cases first
                if not node.left and not node.right:
                    return None
                elif node.left and not node.right:
                    return node.left
                elif node.right and not node.left:
                    return node.right
                else:
                    pass #?!
                
        self.root = delitem_rec(self.root)
        self.size -= 1
In [12]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
t.pprint()
                               10                               
               5                               15               
       2               -               -               17       
In [13]:
del t[2]
t.pprint()
                               10                               
               5                               15               
       -               -               -               17       
In [14]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
del t[5]
t.pprint()
                               10                               
               2                               15               
       -               -               -               17       
In [15]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
del t[15]
t.pprint()
                               10                               
               5                               17               
       2               -               -               -        
In [16]:
t = BSTree()
for x in [10, 5, 15, 2, 17]:
    t.add(x)
del t[10]
t.pprint()
                               -                                
In [17]:
class BSTree(BSTree):
    def __delitem__(self, val):
        assert(val in self)
        def delitem_rec(node):
            if val < node.val:
                node.left = delitem_rec(node.left)
                return node
            elif val > node.val:
                node.right = delitem_rec(node.right)
                return node
            else:
                if not node.left and not node.right:
                    return None
                elif node.left and not node.right:
                    return node.left
                elif node.right and not node.left:
                    return node.right
                else:
                    # remove the largest value from the left subtree as a replacement
                    # for the root value of this tree
                    t = node.left # refers to the candidate for removal
                    if not t.right:
                        node.val = t.val
                        node.left = t.left
                    else:
                        n = t
                        while n.right.right:
                            n = n.right
                        t = n.right
                        n.right = t.left
                        node.val = t.val
                    return node
                        
        self.root = delitem_rec(self.root)
        self.size -= 1
In [18]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.add(x)
t.pprint()
                               10                               
               5                               15               
       2               7               12              18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  
In [19]:
del t[15]
t.pprint()
                               10                               
               5                               12               
       2               7               -               18       
   1       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  
In [20]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.add(x)
del t[5]
t.pprint()
                               10                               
               2                               15               
       1               7               12              18       
   -       -       -       9       -       -       -       -    
 -   -   -   -   -   -   8   -   -   -   -   -   -   -   -   -  
In [21]:
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
    t.add(x)
del t[10]
t.pprint()
                               9                                
               5                               15               
       2               7               12              18       
   1       -       -       8       -       -       -       -    

Iteration / Traversal

In [22]:
class BSTree(BSTree):
    def __iter__(self):
        def iter_rec(node):
            if node:
                for x in iter_rec(node.left):
                    yield x
                yield node.val
                for x in iter_rec(node.right):
                    yield x
        for x in iter_rec(self.root):
            yield x
In [23]:
import random
t = BSTree()
vals = list(range(20))
random.shuffle(vals)
for x in vals:
    t.add(x)
for x in t:
    print(x)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

Iteration by Level

In [24]:
class BSTree(BSTree):
    def __iter__(self):
        nodes = [self.root]
        while nodes:
            n = nodes.pop(0)
            yield n.val
            if n.left:
                nodes.append(n.left)
            if n.right:
                nodes.append(n.right)
In [30]:
import random
t = BSTree()
vals = list(range(10))
random.shuffle(vals)
for x in vals:
    t.add(x)
t.pprint()
                               5                                
               1                               7                
       0               3               6               8        
   -       -       2       4       -       -       -       9    
In [31]:
for x in t:
    print(x)
5
1
7
0
3
6
8
2
4
9

Runtime Complexity

The runtime complexity of the search, add, and delete methods of the binary search tree are dependent, ultimately, on the depth of their recursive implementation. The depth of recursion is in turn dependent on the height of the binary search tree.

Given $N$ nodes, the height of a binary search tree is, in the worst case = $N$

This gives us the following worst-case runtime complexities:

  • Search = O(N)
  • Add = O(N)
  • Delete = O(N)

How can we improve this runtime complexity? What should be our target runtime complexity?