4 def __init__(self
, histsize
, choice
):
5 self
.histsize
= histsize
9 def add(self
, state
, next
):
10 self
.trans
.setdefault(state
, []).append(next
)
16 for i
in range(len(seq
)):
17 add(seq
[max(0, i
-n
):i
], seq
[i
:i
+1])
18 add(seq
[len(seq
)-n
:], None)
24 seq
= choice(trans
[None])
26 subseq
= seq
[max(0, len(seq
)-n
):]
27 options
= trans
[subseq
]
28 next
= choice(options
)
36 import sys
, random
, getopt
39 opts
, args
= getopt
.getopt(args
, '0123456789cdwq')
41 print 'Usage: %s [-#] [-cddqw] [file] ...' % sys
.argv
[0]
43 print '-#: 1-digit history size (default 2)'
44 print '-c: characters (default)'
46 print '-d: more debugging output'
47 print '-q: no debugging output'
48 print 'Input files (default stdin) are split in paragraphs'
49 print 'separated blank lines and each paragraph is split'
50 print 'in words by whitespace, then reconcatenated with'
51 print 'exactly one space separating words.'
52 print 'Output consists of paragraphs separated by blank'
53 print 'lines, where lines are no longer than 72 characters.'
59 if '-0' <= o
<= '-9': histsize
= int(o
[1:])
60 if o
== '-c': do_words
= False
61 if o
== '-d': debug
+= 1
62 if o
== '-q': debug
= 0
63 if o
== '-w': do_words
= True
67 m
= Markov(histsize
, random
.choice
)
73 print 'Sorry, need stdin from file'
76 f
= open(filename
, 'r')
77 if debug
: print 'processing', filename
, '...'
80 paralist
= text
.split('\n\n')
82 if debug
> 1: print 'feeding ...'
88 data
= ' '.join(words
)
90 except KeyboardInterrupt:
91 print 'Interrupted -- continue with data read so far'
93 print 'No valid input files'
95 if debug
: print 'done.'
98 for key
in m
.trans
.keys():
99 if key
is None or len(key
) < histsize
:
100 print repr(key
), m
.trans
[key
]
101 if histsize
== 0: print repr(''), m
.trans
['']
112 if n
+ len(w
) > limit
:
120 if __name__
== "__main__":