# by default, only the result of the last expression in a cell is displayed after evaluation.
# the following forces display of *all* self-standing expressions in a cell.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
class PriorityQueue: # priority is assumed to be the value of the item (or a function of the value of the item)
def __init__(self, key=lambda x:x): # default is max value is highest priority
# key=lambda x : -1*x min valu is highest priorty
# key=lambda x : len(x) longest value has highest priorty
self.data = []
self.key=key # key is a function
def add(self, x): # when add an item, sort the list so last item is the max
# append the item, then sort this list based on the key (self.data[-1] is the max)
self.data.append(x) # amortized O(1)
self.data.sort(key=self.key) # O(n lg n) NOT HAPPY, worse than linear (O(n))
# increasing order is default python sort
def max(self): #O(1)
return self.data[-1] # no error checking on empty PriorityQueue
def pop_max(self): # O(1)
val=self.data[-1]
del self.data[-1] # self.data.pop()
return val
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
pq = PriorityQueue()
import random
for _ in range(10):
pq.add(random.randrange(100))
pq
[7, 21, 27, 31, 60, 80, 90, 93, 94, 94]
while pq:
print(pq.pop_max())
94 94 93 90 80 60 31 27 21 7
pq1 = PriorityQueue(key=lambda x:-x) # min queue does not chaneg values, just changes sort order
import random
for _ in range(10):
pq1.add(random.randrange(100))
pq1
[86, 81, 81, 72, 70, 62, 50, 42, 33, 20]
pq2 = PriorityQueue(key=lambda s:len(s)) # max queue by string length does not chaneg values, just changes sort order
pq2.add("matt")
pq2.add("bauer")
pq2.add("bob")
pq2
['bob', 'matt', 'bauer']
class Heap:
def __init__(self):
self.data = []
@staticmethod # don't need to call on an object self. need to call on Heap.
def _parent(idx): #single underscore method start means private
return (idx-1)//2
@staticmethod
def _left(idx):
return 2*idx+1
@staticmethod
def _right(idx):
return 2*idx+2
# len(self.data) is the next available index in the python list
# we can just append on self.data to add a value on the end of the list amortized O(1)
def add(self, x):
#append on the end of the python list
#"walk it up"
# if value at parent is > value of new node, done
# else swap the values, and continue with "walk it up" UNTIL ROOT
self.data.append(x)
i = len(self.data)-1 #index of the newly appended value
parentI =Heap._parent(i)
# if just added the first item (or if I walked all the way to the root)
# I do not need to do anything, i=0, loop not entered
while i>0 and self.data[i] > self.data[parentI]: # key function not implemented yet
#swap
self.data[i],self.data[parentI]=self.data[parentI],self.data[i]
# move i and parentI up the heap
i=parentI
parentI =Heap._parent(i)
def max(self): # error checking for empty heap not required
return self.data[0]
def _heapify(self): #_ start means private helper method
i=0 # the only possible incorrect value is at the root
# the left and right subtrees of the root are still themselves valid heap
# while Heap._left(i)<len(self.data): # while ith element in the array has left
while True:
leftI=Heap._left(i)
rightI=Heap._right(i) # no error checking on leftI and rightI yet
# check if those leftI and rightI indexes are in the heap,
#then do the three way compares
maxI=i
maxValue=self.data[maxI]
if leftI<len(self.data) and self.data[leftI]>maxValue:
maxI=leftI
maxValue=self.data[leftI]
if rightI<len(self.data) and self.data[rightI]>maxValue:
maxI=rightI
maxValue=self.data[rightI]
# swap if necessary
if maxI!=i:
self.data[i],self.data[maxI]=self.data[maxI],self.data[i]
i=maxI
else:
break
def pop_max(self): # assuming heap is not empty
# 1. save self.data[0]
# move the value in rightmost bottom node (len(self.data)-1) to index 0
#delete that last position in the array
# "walk it down" heapify
# return the saved value from step 1
savedValue = self.data[0]
# error checking is heap empty?
self.data[0]=self.data[len(self.data)-1]
del self.data[len(self.data)-1]
if (len(self.data)>0):
self._heapify()
return savedValue
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
-1//2
-1
h = Heap()
import random
for _ in range(10):
h.add(random.randrange(100))
h
[98, 71, 45, 18, 33, 12, 42, 9, 16, 18]
h.add(50)
h
[98, 71, 45, 18, 50, 12, 42, 9, 16, 18, 33]
h.add(47)
h
[98, 71, 47, 18, 50, 45, 42, 9, 16, 18, 33, 12]
print(h.pop_max())
h
98
[71, 50, 47, 18, 33, 45, 42, 9, 16, 18, 12]
while h:
print(h.pop_max())
71 50 47 45 42 33 18 18 16 12 9
def heapsort(iterable):
heap = Heap()
for x in iterable:
heap.add(x)
sortedList=[None]*len(heap)
i=len(sortedList)-1
while heap:
sortedList[i] = heap.pop_max()
i=i-1 # i-=1
return sortedList
l=heapsort([71, 50, 47, 18, 33, 45, 42, 9, 16, 18, 12])
l
[9, 12, 16, 18, 18, 33, 42, 45, 47, 50, 71]
import random
def pairs(iterable):
it = iter(iterable)
a = next(it)
while True:
b = next(it)
yield a,b
a = b
lst = heapsort(random.random() for _ in range(1000))
all((a <= b) for a, b in pairs(lst))
import timeit
def time_heapsort(n):
return timeit.timeit('heapsort(rlst)',
'from __main__ import heapsort; '
'import random; '
'rlst = (random.random() for _ in range({}))'.format(n),
number=1000)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.show()
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.plot(ns, ns*np.log2(ns)*0.01/10000, 'b') # O(n log n) plot
plt.show()