import urllib.request
md_url = 'https://www.gutenberg.org/files/2701/2701-0.txt'
md_text = urllib.request.urlopen(md_url).read().decode()
idx = md_text.index('Call me Ishmael')
md_text[idx:idx+100]
md_text[idx:idx+100].split()
md_words = md_text.lower().split()
len(md_words)
md_words_uniq = set(md_words)
len(md_words_uniq)
md_word_counts = {}
for w in md_words:
    if w not in md_word_counts:
        md_word_counts[w] = 1
    else:
        md_word_counts[w] += 1
md_word_counts['the']
md_word_counts['moby']
len(md_word_counts)
list(md_word_counts.items())[:10]
md_word_counts_sorted = sorted(md_word_counts.items(),
                              key=lambda pair: pair[1],
                              reverse=True)
md_word_counts_sorted[:10]
%matplotlib inline
import matplotlib.pyplot as plt
n = 10
words  = [p[0] for p in md_word_counts_sorted[:n]]
counts = [p[1] for p in md_word_counts_sorted[:n]]
y_pos = range(n)
plt.bar(y_pos, counts)
plt.xticks(y_pos, words)
plt.show()
phrases = []
for i in range(len(md_words)-1):
    phrases.append(md_words[i:i+2])
phrases[:10]
phrase_dict = {}
for (w1, w2) in phrases:
    if w1 not in phrase_dict:
        phrase_dict[w1] = [w2]
    else:
        phrase_dict[w1].append(w2)
phrase_dict['starboard']
import random
gen_words = ['the']
for _ in range(10):
    words = phrase_dict[gen_words[-1]]
    gen_words.append(random.choice(words))
' '.join(gen_words)
puzzle = '''..3 .2. 6..
            9.. 3.5 ..1
            ..1 8.6 4..
            
            ..8 1.2 9..
            7.. ... ..8
            ..6 7.8 2..
            
            ..2 6.9 5..
            8.. 2.3 ..9
            ..5 .1. 3..
            '''
rows = 'ABCDEFGHI'
cols = '123456789'
squares = [r+c for r in rows for c in cols]
def parse_puzzle(puz_str):
    puzzle = [c if c in '123456789' else None
              for c in puz_str if c not in ' \n']
    return {squares[i]: puzzle[i]
            for i in range(0, len(squares))}
parse_puzzle(puzzle)
vert_units  = [[r+c for r in rows] for c in cols]
horiz_units = [[r+c for c in cols] for r in rows]
box_units   = [[r+c for r in rs for c in cs]
               for rs in ('ABC', 'DEF', 'GHI')
               for cs in ('123', '456', '789')]
all_units = vert_units + horiz_units + box_units
units = {s: [u for u in all_units if s in u] for s in squares}
units['A1']
peers = {s: set([sq for u in units[s] for sq in u]) - {s}
         for s in squares}
peers['A1']
sol = {s: '123456789' for s in squares}
def assign(sol, sq, val):
    '''If assigning a value to a square, eliminate all other
    values from that square in the solution.'''
    for other in sol[sq].replace(val, ''):
        eliminate(sol, sq, other)
def eliminate(sol, sq, val):
    '''Eliminate a value from a square. Also check whether
    eliminating a value from the given square narrows down 
    that value to only one affected square; if so, assign 
    the value to that square.'''
    if val not in sol[sq]:
        return
    sol[sq] = sol[sq].replace(val, '')
    if len(sol[sq]) == 1:
        last = sol[sq][0]
        for p in peers[sq]:
            eliminate(sol, p, last)
    for u in units[sq]:
        candidates = [s for s in u if val in sol[s]]
        if len(candidates) == 1:
            assign(sol, candidates[0], val)
def solve_puzzle(puzz_str):
    puz = parse_puzzle(puzz_str)
    sol = {s: '123456789' for s in squares}
    for sq, val in puz.items():
        if val:
            assign(sol, sq, val)
    return sol
solve_puzzle(puzzle)
def print_sol(sol):
    for r in rows:
        for c in cols:
            print('{:^6} '.format(sol[r+c]), end='')
        print()
print_sol(solve_puzzle(puzzle))
print_sol(solve_puzzle('''
.51 8.. 3.6
.3. ... ...
..9 .42 .15
..4 .75 ...
3.. ... ...
.8. 9.. ...
... ... 8..
.1. ..6 .9.
..7 ... ..4
'''))