Searching, Sorting, and Timing¶

Agenda¶

1. Timing
2. Prelude: Timing list indexing
3. Linear search
4. Binary search
5. Insertion sort

1. Timing¶

In [1]:
import time
time.time()

Out[1]:
1473271526.532128
In [2]:
from time import time
time()

Out[2]:
1473271526.54549
In [3]:
from time import time, sleep
start = time()
sleep(1)
end = time()
print(end - start)

1.005336046218872

In [4]:
start = time()
sum(range(10**6))
end = time()
print(end - start)

0.029382944107055664

In [5]:
def timeit(f):
start = time()
f()
end = time()
return end - start

In [6]:
timeit(lambda: sum(range(10**6)))

Out[6]:
0.027124881744384766
In [7]:
def timeit(f):
duration = 0
for _ in range(10):
start = time()
f()
end = time()
duration += end - start
return duration / 10

In [8]:
timeit(lambda: sum(range(10**6)))

Out[8]:
0.027736496925354005
In [9]:
import timeit
timeit.timeit('sum(range(10**6))', number=100)

Out[9]:
2.737777608000215
In [10]:
timeit.timeit(stmt='sorted(lst)',
setup='import random ; lst = [random.random() for _ in range(100)]',
number=100)

Out[10]:
0.001243830000021262
In [11]:
import random
def to_time(lst):
# do something interesting with the list
pass

timeit.timeit(stmt='to_time(lst)',
setup='lst = [random.random() for _ in range(100)]',
globals=globals(), # allows timeit to access functions we've defined in this notebook
number=1000)

Out[11]:
9.794600009627175e-05

2. Prelude: Timing list indexing¶

In [12]:
timeit.timeit(stmt='lst[0]',
setup='import random; lst=[0] * 10**6')

Out[12]:
0.03286254399972677
In [13]:
timeit.timeit(stmt='lst[10**6-1]',
setup='import random; lst=[0] * 10**6')

Out[13]:
0.032995364000271366
In [14]:
import random
size = 10**3
times = [0] * size
lst   = [0] * size
for _ in range(100):
for i in range(size):
times[i] += timeit.timeit(stmt='lst[{}]'.format(i),
globals=globals(),
number=10)

In [15]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(times, 'ro')
plt.show()


Accessing an element in a list by index always takes the same amount of time, regardless of position. I.e., indexing incurs a constant time delay.

How? A Python list uses an array as its underlying data storage mechanism. To access an element in an array, the interpreter:

1. Computes an offset into the array by multiplying the element's index by the size of each array entry (which are uniformly sized, since they are merely references to the actual elements)

Task: to locate an element with a given value in a list (array).

In [16]:
def index(lst, x):
for i in range(len(lst)):
if lst[i] == x:
return i
return None

In [17]:
lst = list(range(100))

In [18]:
index(lst, 10)

Out[18]:
10
In [19]:
index(lst, 99)

Out[19]:
99
In [20]:
index(lst, -1)

In [21]:
def index(lst, x):
for i in range(len(lst)):
if lst[i] == x:
return i
raise ValueError(x)

In [22]:
index(lst, 10)

Out[22]:
10
In [23]:
index(lst, -1)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-58f872d52287> in <module>()
----> 1 index(lst, -1)

<ipython-input-21-1cf466be2781> in index(lst, x)
3         if lst[i] == x:
4             return i
----> 5     raise ValueError(x)

ValueError: -1
In [24]:
try:
print('Value found at', index(lst, -1))
except ValueError as e:

Value not found: -1

In [25]:
import timeit
times = []
lst = list(range(1000))
for x in lst:
times.append(timeit.timeit(stmt='index(lst, {})'.format(x),
globals=globals(),
number=100))

In [26]:
import matplotlib.pyplot as plt
plt.plot(times, 'ro')
plt.show()

In [27]:
def index(lst, x):
# assume that lst is sorted!!!
low = 0
hi  = len(lst)
mid = (low + hi) // 2
while lst[mid] != x and low <= hi:
if lst[mid] < x:
low = mid + 1
else:
hi  = mid - 1
mid = (low + hi) // 2
if lst[mid] == x:
return mid
else:
raise ValueError(x)

In [28]:
lst = list(range(1000))
index(lst, 10)

Out[28]:
10
In [29]:
index(lst, 999)

Out[29]:
999
In [30]:
index(lst, -1)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-58f872d52287> in <module>()
----> 1 index(lst, -1)

<ipython-input-27-017ffb928b46> in index(lst, x)
13         return mid
14     else:
---> 15         raise ValueError(x)

ValueError: -1
In [31]:
for i in range(len(lst)):
assert(i == index(lst, i))

In [32]:
import timeit
times = []
lst = list(range(1000))
for x in lst:
times.append(timeit.timeit(stmt='index(lst, {})'.format(x),
globals=globals(),
number=1000))

In [33]:
import matplotlib.pyplot as plt
plt.plot(times, 'ro')
plt.show()

In [34]:
def index(lst, x):
# assume that lst is sorted!!!
low = 0
hi  = len(lst)
mid = (low + hi) // 2
while lst[mid] != x and low <= hi:
if lst[mid] < x:
low = mid + 1
else:
hi  = mid - 1
mid = (low + hi) // 2
if lst[mid] == x:
return mid
else:
return None # for testing!

In [35]:
import timeit
import random
times = []
for size in range(100, 10000, 100):
lst = list(range(size))
times.append(timeit.timeit(stmt='index(lst, -1)'.format(random.randrange(size)),
globals=globals(),
number=10000))

In [36]:
import matplotlib.pyplot as plt
plt.plot(times, 'ro')
plt.show()

In [37]:
import timeit
import random
times = []
for e in range(5, 20):
lst = list(range(2**e))
times.append(timeit.timeit(stmt='index(lst, -1)',
globals=globals(),
number=100000))

In [38]:
import matplotlib.pyplot as plt
plt.plot(times, 'ro')
plt.show()


5. Insertion sort¶

In [39]:
import random
lst = list(range(1000))
random.shuffle(lst)

In [40]:
plt.plot(lst, 'ro')
plt.show()

In [41]:
def insertion_sort(lst):
for i in range(1, len(lst)):
for j in range(i, 0, -1):
if lst[j-1] > lst[j]:
lst[j-1], lst[j] = lst[j], lst[j-1] # swap
else:
break

In [42]:
insertion_sort(lst)

In [43]:
plt.plot(lst, 'ro')
plt.show()

In [44]:
import timeit
import random
times = []
for size in range(100, 5000, 100):
lst = list(range(size))
times.append(timeit.timeit(stmt='insertion_sort(lst)',
setup='random.shuffle(lst)',
globals=globals(),
number=1))

In [45]:
plt.plot(times, 'ro')
plt.show()

In [ ]: