import urllib.request
md_url = 'https://www.gutenberg.org/files/2701/2701-0.txt'
md_text = urllib.request.urlopen(md_url).read().decode()
idx = md_text.index('Call me Ishmael')
md_text[idx:idx+100]
md_text[idx:idx+100].split()
md_words = md_text.lower().split()
len(md_words)
md_words_uniq = set(md_words)
len(md_words_uniq)
md_word_counts = {}
for w in md_words:
if w not in md_word_counts:
md_word_counts[w] = 1
else:
md_word_counts[w] += 1
md_word_counts['the']
md_word_counts['moby']
len(md_word_counts)
list(md_word_counts.items())[:10]
md_word_counts_sorted = sorted(md_word_counts.items(),
key=lambda pair: pair[1],
reverse=True)
md_word_counts_sorted[:10]
%matplotlib inline
import matplotlib.pyplot as plt
n = 10
words = [p[0] for p in md_word_counts_sorted[:n]]
counts = [p[1] for p in md_word_counts_sorted[:n]]
y_pos = range(n)
plt.bar(y_pos, counts)
plt.xticks(y_pos, words)
plt.show()
phrases = []
for i in range(len(md_words)-1):
phrases.append(md_words[i:i+2])
phrases[:10]
phrase_dict = {}
for (w1, w2) in phrases:
if w1 not in phrase_dict:
phrase_dict[w1] = [w2]
else:
phrase_dict[w1].append(w2)
phrase_dict['starboard']
import random
gen_words = ['the']
for _ in range(10):
words = phrase_dict[gen_words[-1]]
gen_words.append(random.choice(words))
' '.join(gen_words)
puzzle = '''..3 .2. 6..
9.. 3.5 ..1
..1 8.6 4..
..8 1.2 9..
7.. ... ..8
..6 7.8 2..
..2 6.9 5..
8.. 2.3 ..9
..5 .1. 3..
'''
rows = 'ABCDEFGHI'
cols = '123456789'
squares = [r+c for r in rows for c in cols]
def parse_puzzle(puz_str):
puzzle = [c if c in '123456789' else None
for c in puz_str if c not in ' \n']
return {squares[i]: puzzle[i]
for i in range(0, len(squares))}
parse_puzzle(puzzle)
vert_units = [[r+c for r in rows] for c in cols]
horiz_units = [[r+c for c in cols] for r in rows]
box_units = [[r+c for r in rs for c in cs]
for rs in ('ABC', 'DEF', 'GHI')
for cs in ('123', '456', '789')]
all_units = vert_units + horiz_units + box_units
units = {s: [u for u in all_units if s in u] for s in squares}
units['A1']
peers = {s: set([sq for u in units[s] for sq in u]) - {s}
for s in squares}
peers['A1']
sol = {s: '123456789' for s in squares}
def assign(sol, sq, val):
'''If assigning a value to a square, eliminate all other
values from that square in the solution.'''
for other in sol[sq].replace(val, ''):
eliminate(sol, sq, other)
def eliminate(sol, sq, val):
'''Eliminate a value from a square. Also check whether
eliminating a value from the given square narrows down
that value to only one affected square; if so, assign
the value to that square.'''
if val not in sol[sq]:
return
sol[sq] = sol[sq].replace(val, '')
if len(sol[sq]) == 1:
last = sol[sq][0]
for p in peers[sq]:
eliminate(sol, p, last)
for u in units[sq]:
candidates = [s for s in u if val in sol[s]]
if len(candidates) == 1:
assign(sol, candidates[0], val)
def solve_puzzle(puzz_str):
puz = parse_puzzle(puzz_str)
sol = {s: '123456789' for s in squares}
for sq, val in puz.items():
if val:
assign(sol, sq, val)
return sol
solve_puzzle(puzzle)
def print_sol(sol):
for r in rows:
for c in cols:
print('{:^6} '.format(sol[r+c]), end='')
print()
print_sol(solve_puzzle(puzzle))
print_sol(solve_puzzle('''
.51 8.. 3.6
.3. ... ...
..9 .42 .15
..4 .75 ...
3.. ... ...
.8. 9.. ...
... ... 8..
.1. ..6 .9.
..7 ... ..4
'''))