account for PyObject_IsInstance's new ability to fail
[python.git] / Demo / scripts / markov.py
blobdf4dec0c5e53782800ec1b9291f8010164b8726f
1 #! /usr/bin/env python
3 class Markov:
4 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
9 def add(self, state, next):
10 self.trans.setdefault(state, []).append(next)
12 def put(self, seq):
13 n = self.histsize
14 add = self.add
15 add(None, seq[:0])
16 for i in range(len(seq)):
17 add(seq[max(0, i-n):i], seq[i:i+1])
18 add(seq[len(seq)-n:], None)
20 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
25 while True:
26 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
29 if not next:
30 break
31 seq += next
32 return seq
35 def test():
36 import sys, random, getopt
37 args = sys.argv[1:]
38 try:
39 opts, args = getopt.getopt(args, '0123456789cdwq')
40 except getopt.error:
41 print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]
42 print 'Options:'
43 print '-#: 1-digit history size (default 2)'
44 print '-c: characters (default)'
45 print '-w: words'
46 print '-d: more debugging output'
47 print '-q: no debugging output'
48 print 'Input files (default stdin) are split in paragraphs'
49 print 'separated blank lines and each paragraph is split'
50 print 'in words by whitespace, then reconcatenated with'
51 print 'exactly one space separating words.'
52 print 'Output consists of paragraphs separated by blank'
53 print 'lines, where lines are no longer than 72 characters.'
54 sys.exit(2)
55 histsize = 2
56 do_words = False
57 debug = 1
58 for o, a in opts:
59 if '-0' <= o <= '-9': histsize = int(o[1:])
60 if o == '-c': do_words = False
61 if o == '-d': debug += 1
62 if o == '-q': debug = 0
63 if o == '-w': do_words = True
64 if not args:
65 args = ['-']
67 m = Markov(histsize, random.choice)
68 try:
69 for filename in args:
70 if filename == '-':
71 f = sys.stdin
72 if f.isatty():
73 print 'Sorry, need stdin from file'
74 continue
75 else:
76 f = open(filename, 'r')
77 if debug: print 'processing', filename, '...'
78 text = f.read()
79 f.close()
80 paralist = text.split('\n\n')
81 for para in paralist:
82 if debug > 1: print 'feeding ...'
83 words = para.split()
84 if words:
85 if do_words:
86 data = tuple(words)
87 else:
88 data = ' '.join(words)
89 m.put(data)
90 except KeyboardInterrupt:
91 print 'Interrupted -- continue with data read so far'
92 if not m.trans:
93 print 'No valid input files'
94 return
95 if debug: print 'done.'
97 if debug > 1:
98 for key in m.trans.keys():
99 if key is None or len(key) < histsize:
100 print repr(key), m.trans[key]
101 if histsize == 0: print repr(''), m.trans['']
102 print
103 while True:
104 data = m.get()
105 if do_words:
106 words = data
107 else:
108 words = data.split()
109 n = 0
110 limit = 72
111 for w in words:
112 if n + len(w) > limit:
113 print
114 n = 0
115 print w,
116 n += len(w) + 1
117 print
118 print
120 if __name__ == "__main__":
121 test()