Minor documentation change - hyperlink tidied up.
[python.git] / Demo / scripts / markov.py
blobbddec569364c1077dcda8abc60fd4e4183aa1660
1 #! /usr/bin/env python
3 class Markov:
4 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
8 def add(self, state, next):
9 if not self.trans.has_key(state):
10 self.trans[state] = [next]
11 else:
12 self.trans[state].append(next)
13 def put(self, seq):
14 n = self.histsize
15 add = self.add
16 add(None, seq[:0])
17 for i in range(len(seq)):
18 add(seq[max(0, i-n):i], seq[i:i+1])
19 add(seq[len(seq)-n:], None)
20 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
25 while 1:
26 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
29 if not next: break
30 seq = seq + next
31 return seq
33 def test():
34 import sys, string, random, getopt
35 args = sys.argv[1:]
36 try:
37 opts, args = getopt.getopt(args, '0123456789cdw')
38 except getopt.error:
39 print 'Usage: markov [-#] [-cddqw] [file] ...'
40 print 'Options:'
41 print '-#: 1-digit history size (default 2)'
42 print '-c: characters (default)'
43 print '-w: words'
44 print '-d: more debugging output'
45 print '-q: no debugging output'
46 print 'Input files (default stdin) are split in paragraphs'
47 print 'separated blank lines and each paragraph is split'
48 print 'in words by whitespace, then reconcatenated with'
49 print 'exactly one space separating words.'
50 print 'Output consists of paragraphs separated by blank'
51 print 'lines, where lines are no longer than 72 characters.'
52 histsize = 2
53 do_words = 0
54 debug = 1
55 for o, a in opts:
56 if '-0' <= o <= '-9': histsize = eval(o[1:])
57 if o == '-c': do_words = 0
58 if o == '-d': debug = debug + 1
59 if o == '-q': debug = 0
60 if o == '-w': do_words = 1
61 if not args: args = ['-']
62 m = Markov(histsize, random.choice)
63 try:
64 for filename in args:
65 if filename == '-':
66 f = sys.stdin
67 if f.isatty():
68 print 'Sorry, need stdin from file'
69 continue
70 else:
71 f = open(filename, 'r')
72 if debug: print 'processing', filename, '...'
73 text = f.read()
74 f.close()
75 paralist = string.splitfields(text, '\n\n')
76 for para in paralist:
77 if debug > 1: print 'feeding ...'
78 words = string.split(para)
79 if words:
80 if do_words: data = tuple(words)
81 else: data = string.joinfields(words, ' ')
82 m.put(data)
83 except KeyboardInterrupt:
84 print 'Interrupted -- continue with data read so far'
85 if not m.trans:
86 print 'No valid input files'
87 return
88 if debug: print 'done.'
89 if debug > 1:
90 for key in m.trans.keys():
91 if key is None or len(key) < histsize:
92 print repr(key), m.trans[key]
93 if histsize == 0: print repr(''), m.trans['']
94 print
95 while 1:
96 data = m.get()
97 if do_words: words = data
98 else: words = string.split(data)
99 n = 0
100 limit = 72
101 for w in words:
102 if n + len(w) > limit:
103 print
104 n = 0
105 print w,
106 n = n + len(w) + 1
107 print
108 print
110 def tuple(list):
111 if len(list) == 0: return ()
112 if len(list) == 1: return (list[0],)
113 i = len(list)/2
114 return tuple(list[:i]) + tuple(list[i:])
116 if __name__ == "__main__":
117 test()