# by default, only the result of the last expression in a cell is displayed after evaluation.
# the following forces display of *all* self-standing expressions in a cell.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
class PriorityQueue:
def __init__(self, keyFunc=lambda x:x): # default keyFunc is just to return the key
# default is max heap
# if I want a minheap keyFunc=lambda x : -1*x
# if I want the heap priority based on length of key (max first) keyFunc=lambda x : len(x)
# if I want the heap priority based on length of key (min first) keyFunc=lambda x : -1*len(x)
self.data = [] # python list assume append is O(1) and pop() O(1)
self.keyFunc=keyFunc
def add(self, x): #O(n) If instead of "walk it down" you sort after each append O(n lgn)
i = len(self.data)-1 # last valid index position
self.data.append(x)
#walk it down (maintining self.data in sorted order)
while i>=0 and self.keyFunc(self.data[i+1]) < self.keyFunc(self.data[i]):
self.data[i+1],self.data[i]=self.data[i],self.data[i+1] #swap
i-=1
def max(self): # O(1)
return self.data[-1]
def pop_max(self): # O(1)
temp=self.data.pop()
return temp
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
pq = PriorityQueue()
import random
for _ in range(10):
pq.add(random.randrange(100))
pq
[10, 11, 18, 22, 58, 58, 63, 69, 87, 97]
while pq:
print(pq.pop_max())
97 87 69 63 58 58 22 18 11 10
pq = PriorityQueue(keyFunc=lambda x:-1*x) #minqueue
import random
for _ in range(10):
pq.add(random.randrange(100))
pq
[94, 91, 90, 89, 86, 58, 52, 33, 7, 5]
'''
# what if l is not in the heap, what if r is not in the heap
if l<len(self.data) and r<len(self.data) and self.data[l]>self.data[r]:
#left is larger
larger=l
else:
#right is larger
larger=r
if self.data[idx]>self.data[larger]:
# DONE
break
else:
self.data[idx],self.data[larger]= self.data[larger],self.data[idx]
# I need to do it again
idx = larger
'''
class Heap:
def __init__(self):
self.data = []
# need some methods that given an index return the parentIndex, or leftChildIndex or rightChildIndex
# class methods, helper methods, they do not use "self"
@staticmethod # decorator @staticmethod means class method
def _parent(idx): #method starts with _ means internal (private)
return (idx-1)//2
@staticmethod
def _left(idx):
return (idx*2)+1
@staticmethod
def _right(idx):
return (idx*2)+2
def add(self, x):
# append on the array (python list) and then "walk it up"
self.data.append(x)
i=len(self.data)-1 # the index of the item just appended
p=Heap._parent(i)
# O(height of almost complete binary tree) O(lg n)
while i>0 and self.data[p] < self.data[i]: # assumed max heap only based on item values
self.data[p],self.data[i]=self.data[i],self.data[p]
i=p
p=Heap._parent(i)
def max(self): #
if self:
return self.data[0]
else:
return None
def pop_max(self):
# save the root value
# move the value from last node in the heap up to the root, set last node to None
# decremnt heapsize
# heapify from root down (3 way compare, walking largest of 3 up)
if self:
maxVal = self.data[0]
# self.data[0]=self.data.pop() # remove last item in list and decrement heapsize
self.data[0]=self.data[len(self.data)-1]
del self.data[-1]
self._heapify()
return maxVal
else:
return None
def _heapify(self, idx=0): # privatge helper instance method
# assuming the left and right subtrees from index idx are themselves valid heaps
# 3 way compare? find the index of the max of three nodes
#node of interest is at idx index
while True:
l=Heap._left(idx)
r=Heap._right(idx)
larger=idx
if l<len(self.data) and self.data[l]>self.data[larger]:
larger=l
if r<len(self.data) and self.data[r]>self.data[larger]:
larger=r
if larger==idx:
break
else:
self.data[idx],self.data[larger]= self.data[larger],self.data[idx]
idx = larger
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
h = Heap()
import random
for _ in range(10):
h.add(random.randrange(100))
h
[72, 64, 43, 31, 48, 3, 36, 3, 8, 3]
while h:
print(h.pop_max())
72 64 48 43 36 31 8 3 3 3
def heapsort(iterable): # O(n lg n) sorting algorithm
# put all the elements into a heap
# repeatedly popmax from heap and place in new array(list) in reverse order
heap = Heap()
for x in iterable: #O (nlg n) THERE IS A FASTER WAY TO BUILD HEAP, CS430
heap.add(x)
sortedList = [None]*len(heap)
i=len(sortedList)-1
while heap: #O (nlg n)
val=heap.pop_max()
sortedList[i]=val
i-=1
return sortedList
import random
def pairs(iterable):
it = iter(iterable)
a = next(it)
for i in range(len(iterable)-1):
b = next(it)
yield a,b
a = b
lst = heapsort(random.random() for _ in range(1000))
all((a <= b) for a, b in pairs(lst))
True
import timeit
def time_heapsort(n):
return timeit.timeit('heapsort(rlst)',
'from __main__ import heapsort; '
'import random; '
'rlst = (random.random() for _ in range({}))'.format(n),
number=1000)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.show()
[<matplotlib.lines.Line2D at 0x29b0322de50>]
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.plot(ns, ns*np.log2(ns)*0.01/10000, 'b') # O(n log n) plot
plt.show()
[<matplotlib.lines.Line2D at 0x29b03591070>]
[<matplotlib.lines.Line2D at 0x29b03591460>]