# by default, only the result of the last expression in a cell is displayed after evaluation.
# the following forces display of *all* self-standing expressions in a cell.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
class PriorityQueue:
def __init__(self, key=lambda x:x): # max queue default
# key is a function how to assign priorities to values inserted
# we want the ability of user who creates the queue to
#specify how to assign priorities and compare
# key=lambda x:-x min value has high priority
self.data = [] # python list, back of list is for dequeue
# rear of list for enqueue
self.key=key # function
def add(self, x):
# add x to the list and get it into correct slot (priority order)
self.data.append(x) #O(1)
# self.data.sort(key=self.key) # python sort has "key" argument for how to compare items
# O(n^2)
# search/walk it down O(n) start at len(self.data)-1
i=len(self.data)-1
while (i>0 and self.key(self.data[i])<self.key(self.data[i-1])):
self.data[i], self.data[i-1]= self.data[i-1], self.data[i]
i-=1
def max(self):
return self.data[-1]
def pop_max(self):
return self.data.pop(-1) #O(1)
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
pq = PriorityQueue()
import random
for _ in range(10):
pq.add(random.randrange(100))
pq
[6, 10, 12, 24, 27, 46, 50, 67, 85, 99]
while pq:
print(pq.pop_max())
99 85 67 50 46 27 24 12 10 6
pq1 = PriorityQueue(key=lambda x:-x)
import random
for _ in range(10):
pq1.add(random.randrange(100))
pq1
[97, 88, 77, 54, 42, 38, 34, 28, 23, 17]
pq2 = PriorityQueue(key=lambda x:len(x))
pq2.add("matt")
pq2.add("bob")
pq2.add("james")
pq2.add("mary")
pq2.add("matto")
pq2
['bob', 'matt', 'mary', 'james', 'matto']
class Heap:
def __init__(self):
self.data = []
# we need to support index arithmetic to move from parent to left
# parent to right, child to parent
# given indexParent, indexRight=(2* indexParent)+2
# indexLeft=(2* indexParent)+2
# given an index of child (left or right), what is index of parent?
# parent(indx) = (indx-1)//2
# python a private method starts with a single underscore
#python we can make class methods (no access to object attributes)
# use a "decorator" @staticmethod
@staticmethod
def _parent(idx):
return (idx-1)//2
@staticmethod
def _right(idx):
return (2*idx)+2
@staticmethod
def _left(idx):
return (2*idx)+1
def add(self, x): #O(lg n)
# add the value x to the next position in the heap (python list)
# "walk it up", series of compare/swaps
#len(self.data) is the next avaialble index
#self.data[len(self.data)]=x
self.data.append(x) # its index position? len(self.data)-1
idx=len(self.data)-1
parentIndex=Heap._parent(idx)
#if self.data[idx]> self.data[parentIndex] # swap needed
while idx>0 and self.data[idx]> self.data[parentIndex]:
self.data[idx],self.data[parentIndex]=self.data[parentIndex],self.data[idx]
idx=parentIndex
parentIndex=Heap._parent(idx)
def max(self):
if len(self.data)>0:
return self.data[0]
else:
return None
def pop_max(self):
# assuming heap is not empty
# save the self.data[0]
#value for return later
# swap the last position in heap to self.data[0], make the self.data one smaller
# "walk it down" write a seperate fucntion called heapify
valToReturn=self.data[0]
# self.data[0]=self.data.pop()
self.data[0]=self.data[len(self.data)-1]
del self.data[len(self.data)-1]
self._heapify(0)
return valToReturn
# private since starts with underscore, user cannot call
# instance method, it does work on the heap
def _heapify(self, idx=0): # O(lg n) when called on root
# LOOP
while True:
# three way compare with idx, rightIdx leftIdx values IF EXIST!!
leftIdx = Heap._left(idx)
rightIdx = Heap._right(idx)
maxIdx=idx
if leftIdx<len(self.data) and self.data[leftIdx]>self.data[maxIdx]:
maxIdx=leftIdx
if rightIdx<len(self.data) and self.data[rightIdx]>self.data[maxIdx]:
maxIdx=rightIdx
if maxIdx != idx:
self.data[idx],self.data[maxIdx]=self.data[maxIdx],self.data[idx]
idx=maxIdx
else:
break
def __bool__(self):
return len(self.data) > 0
def __len__(self):
return len(self.data)
def __repr__(self):
return repr(self.data)
h = Heap()
import random
for _ in range(10):
h.add(random.randrange(100))
h
[96, 62, 87, 48, 20, 29, 41, 33, 47, 1]
while h:
print(h.pop_max())
96 87 62 48 47 41 33 29 20 1
def heapsort(iterable): # O(n lg n)
heap = Heap()
for x in iterable:
heap.add(x)
sortedList=[None]*len(heap)
i=len(heap)-1
while heap:
sortedList[i]=heap.pop_max()
i-=1
return sortedList
lst = heapsort(random.random() for _ in range(10))
lst
[0.12523080632115047, 0.15553022024290897, 0.19674745808124916, 0.42777658521510353, 0.5626636377133741, 0.7249588964768504, 0.742917340437266, 0.7804899199420483, 0.9347752766091569, 0.9815484247524765]
import random
def pairs(iterable):
it = iter(iterable)
a = next(it)
while True:
b = next(it)
yield a,b
a = b
lst = heapsort(random.random() for _ in range(9))
#all((a <= b) for a, b in pairs(lst))
for i in ((a <= b) for a, b in pairs(lst)):
print(i)
True True True True True True True True
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) <ipython-input-30-1a5950f8f79e> in pairs(iterable) 6 while True: ----> 7 b = next(it) 8 yield a,b StopIteration: The above exception was the direct cause of the following exception: RuntimeError Traceback (most recent call last) <ipython-input-30-1a5950f8f79e> in <module> 11 lst = heapsort(random.random() for _ in range(9)) 12 #all((a <= b) for a, b in pairs(lst)) ---> 13 for i in ((a <= b) for a, b in pairs(lst)): 14 print(i) <ipython-input-30-1a5950f8f79e> in <genexpr>(.0) 11 lst = heapsort(random.random() for _ in range(9)) 12 #all((a <= b) for a, b in pairs(lst)) ---> 13 for i in ((a <= b) for a, b in pairs(lst)): 14 print(i) RuntimeError: generator raised StopIteration
import timeit
def time_heapsort(n):
return timeit.timeit('heapsort(rlst)',
'from __main__ import heapsort; '
'import random; '
'rlst = (random.random() for _ in range({}))'.format(n),
number=1000)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.show()
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.plot(ns, ns*np.log2(ns)*0.01/10000, 'b') # O(n log n) plot
plt.show()