For this assignment you will update and complete the implementation of the hashtable data structure presented in class, which exposes an API mirroring that of the built-in Python dict
. When iterating over its contents (supported by the __iter__
, keys
, values
, and items
methods), your updated implementation will also reflect the order in which key/value pairs were originally inserted into the hashtable. This will require that you implement the two-tiered list system described during lecture.
The operations you will implement are listed alongside their descriptions below (h
refers to a hashtable):
Operation | Description |
---|---|
h[k] = v |
If h does not contain key k , a new k →v mapping is added, else the value for key k is updated to v . |
h[k] |
If h contains key k , the corresponding value is returned, else a KeyError is raised. |
del h[k] |
If h contains key k , it is removed along with its value, else a KeyError is raised. Note that if k is re-inserted at some later point it is considered a new key (for ordering purposes). |
k in h |
Returns True if key k is in h . |
len(h) |
Returns the number of keys in h . |
iter(h) |
Returns an iterator over all the keys in h , in the order they were added. |
h.keys() |
(Same as above) |
h.values() |
Returns an iterator over all the values in h , in the order they were added. |
h.items() |
Returns an iterator over all the key/value pairs (as tuples) in h , in the order they were added. |
Your hashtable will be provided with the initial number of buckets on creation (i.e., in __init__
); your implementation must heed this value, as there may be performance ramifications if it does not.
class OrderedHashtable:
class Node:
"""This class is used to create nodes in the singly linked "chains" in
each hashtable bucket."""
def __init__(self, index, next=None):
# don't rename the following attributes!
self.index = index
self.next = next
def __init__(self, n_buckets=1000):
# the following two variables should be used to implement the "two-tiered"
# ordered hashtable described in class -- don't rename them!
self.indices = [None] * n_buckets
self.entries = []
self.count = 0
def __getitem__(self, key):
# YOUR CODE HERE
raise NotImplementedError()
def __setitem__(self, key, val):
# YOUR CODE HERE
raise NotImplementedError()
def __delitem__(self, key):
# YOUR CODE HERE
raise NotImplementedError()
def __contains__(self, key):
try:
_ = self[key]
return True
except:
return False
def __len__(self):
return self.count
def __iter__(self):
# YOUR CODE HERE
raise NotImplementedError()
def keys(self):
return iter(self)
def values(self):
# YOUR CODE HERE
raise NotImplementedError()
def items(self):
# YOUR CODE HERE
raise NotImplementedError()
def __str__(self):
return '{ ' + ', '.join(str(k) + ': ' + str(v) for k, v in self.items()) + ' }'
def __repr__(self):
return str(self)
# (3 tests) Short tests
from unittest import TestCase
import random
tc = TestCase()
ht = OrderedHashtable(2)
for k, v in (('batman', 'bruce wayne'), ('superman', 'clark kent'), ('spiderman', 'peter parker')):
ht[k] = v
tc.assertEqual(len(ht), 3)
tc.assertEqual(ht['superman'], 'clark kent')
tc.assertTrue('spiderman' in ht)
tc.assertFalse('iron man' in ht)
with tc.assertRaises(KeyError):
ht['iron man']
# (3 points) Basic tests (insertion, fetch, count, chain-lengths)
from unittest import TestCase
import random
tc = TestCase()
class MyInt(int):
def __hash__(self):
"""MyInts hash to themselves — already current Python default,
but just to ensure consistency."""
return self
def ll_len(l):
"""Returns the length of a linked list with head `l` (assuming no sentinel)"""
c = 0
while l:
c += 1
l = l.next
return c
ht = OrderedHashtable(10)
for i in range(25):
ht[MyInt(i)] = i*2
tc.assertEqual(len(ht), 25)
for i in range(5):
tc.assertEqual(ll_len(ht.indices[i]), 3)
for i in range(5, 10):
tc.assertEqual(ll_len(ht.indices[i]), 2)
for i in range(25):
tc.assertTrue(MyInt(i) in ht)
tc.assertEqual(ht[MyInt(i)], i*2)
# (3 points) Update testing
from unittest import TestCase
import random
tc = TestCase()
ht = OrderedHashtable(100)
d = {}
for i in range(100):
k, v = str(i), str(i*2)
d[k] = v
ht[k] = v
for j in range(0, 100, 2):
k, v = str(i), str(i*3)
d[k] = v
ht[k] = v
for j in range(0, 100, 4):
k, v = str(i), str(i*4)
d[k] = v
ht[k] = v
for i in range(100):
tc.assertTrue(k in ht)
tc.assertEqual(d[k], ht[k])
# (3 points) Deletion testing
from unittest import TestCase
import random
tc = TestCase()
ht = OrderedHashtable(100)
d = {}
for i in range(100):
k, v = str(i), str(random.randrange(10000000, 99999999))
d[k] = v
ht[k] = v
for _ in range(50):
k = str(random.randrange(100))
if k in d:
del d[k]
del ht[k]
tc.assertEqual(len(ht), len(d))
for k,v in ht.items():
tc.assertEqual(d[k], v)
# (4 points) Iteration order testing
from unittest import TestCase
import random
tc = TestCase()
ht = OrderedHashtable(1000)
l = [str(i) for i in range(0, 1000)]
random.shuffle(l)
for x in l:
ht[x] = x
for _ in range(50):
idx_to_del = random.randrange(len(l))
val_to_del = l[idx_to_del]
del ht[val_to_del]
del l[idx_to_del]
if random.randrange(2) == 0:
l.append(val_to_del)
ht[val_to_del] = val_to_del
for x, y in zip(l, ht):
tc.assertEqual(x, y)
# (4 points) Stress testing
from unittest import TestCase
from time import time
import random
tc = TestCase()
ht = OrderedHashtable(100000)
d = {}
start = time()
for _ in range(100000):
k, v = str(random.randrange(100000)), str(random.randrange(10000000, 99999999))
d[k] = v
ht[k] = v
for k,v in d.items():
tc.assertTrue(k in ht)
tc.assertEqual(d[k], ht[k])
end = time()
print(end-start)
tc.assertLess(end-start, 1.5, 'Your implementation ran too slow!')