a bit better, but still not sure P_STOP does everything right
[dmvccm.git] / src / dmv.py
blobad3ec5142533bc534a93adabf2e2d0785205d2b1
1 #### changes by KBU:
2 # 2008-05-24:
3 # - prettier printout for DMV_Rule
4 # - DMV_Rule changed a bit. head, L and R are now all pairs of the
5 # form (bars, head).
6 # - Started on P_STOP, a bit less pseudo now..
8 # 2008-05-27:
9 # - started on initialization. So far, I have frequencies for
10 # everything, very harmonic. Still need to make these into 1-summing
11 # probabilities
13 # 2008-05-28:
14 # - more work on initialization (init_freq and init_normalize),
15 # getting closer to probabilities now.
17 # 2008-05-29:
18 # - init_normalize is done, it creates p_STOP, p_ROOT and p_CHOOSE,
19 # and also adds the relevant probabilities to p_rules in a grammar.
20 # Still, each individual rule has to store both adjacent and non_adj
21 # probabilities, and inner() should be able to send some parameter
22 # which lets the rule choose... hopefully... Is this possible to do
23 # top-down even? when the sentence could be all the same words?
24 # todo: extensive testing of identical words in sentences!
25 # - frequencies (only used in initialization) are stored as strings,
26 # but in the rules and p_STOP etc, there are only numbers.
28 # 2008-05-30
29 # - copied inner() into this file, to make the very dmv-specific
30 # adjacency stuff work (have to factor that out later on, when it
31 # works).
33 # 2008-06-01
34 # - finished typing in inner_dmv(), still have to test and debug
35 # it. The chart is now four times as big since for any rule we may
36 # have attachments to either the left or the right below, which
37 # upper rules depend on, for selecting probN or probA
39 # 2008-06-03
40 # - fixed a number of little bugs in initialization, where certain
41 # rules were simply not created, or created "backwards"
42 # - inner_dmv() should Work now...
44 # 2008-06-04
45 # - moved initialization to harmonic.py
48 # import numpy # numpy provides Fast Arrays, for future optimization
49 import pprint
50 import io
51 import harmonic
53 # non-tweakable/constant "lookup" globals
54 BARS = [0,1,2]
55 RBAR = 1
56 LRBAR = 2
57 NOBAR = 0
58 ROOT = (LRBAR, -1)
59 STOP = (NOBAR, -2)
61 if __name__ == "__main__":
62 print "DMV module tests:"
65 def node(bars, head):
66 '''Useless function, but just here as documentation. Nodes make up
67 LHS, R and L in each DMV_Rule'''
68 return (bars, head)
70 def bars(node):
71 return node[0]
73 def head(node):
74 return node[1]
77 class DMV_Grammar(io.Grammar):
78 '''The DMV-PCFG.
80 Public members:
81 p_STOP, p_ROOT, p_CHOOSE, p_terminals
82 These are changed in the Maximation step, then used to set the
83 new probabilities of each DMV_Rule.
85 Todo: make p_terminals private? (But it has to be changable in
86 maximation step due to the short-cutting rules... could of course
87 make a DMV_Grammar function to update the short-cut rules...)
89 __p_rules is private, but we can still say stuff like:
90 for r in g.all_rules():
91 r.probN = newProbN
93 What other representations do we need? (P_STOP formula uses
94 deps_D(h,l/r) at least)'''
95 def __str__(self):
96 str = ""
97 for r in self.all_rules():
98 str += "%s\n" % r.__str__(self.numtag)
99 return str
101 def h_rules(self, h):
102 return [r for r in self.all_rules() if r.head() == h]
104 def rules(self, LHS):
105 return [r for r in self.all_rules() if r.LHS() == LHS]
107 def sent_rules(self, LHS, sent_nums):
108 "Used in inner_dmv."
109 # We don't want to rule out STOPs!
110 sent_nums.append( head(STOP) )
111 return [r for r in self.all_rules() if r.LHS() == LHS
112 and head(r.L()) in sent_nums and head(r.R()) in sent_nums]
114 def heads(self):
115 '''Not sure yet what is needed here, or where this is needed'''
116 return numtag
118 def deps_L(self, head):
119 # todo test, probably this list comprehension doesn't work
120 return [a for r in self.all_rules() if r.head() == head and a == r.L()]
122 def deps_R(self, head):
123 # todo test, probably this list comprehension doesn't work
124 return [a for r in self.all_rules() if r.head() == head and a == r.R()]
126 def __init__(self, p_rules, p_terminals, p_STOP, p_CHOOSE, p_ROOT, numtag, tagnum):
127 io.Grammar.__init__(self, p_rules, p_terminals, numtag, tagnum)
128 self.p_STOP = p_STOP
129 self.p_CHOOSE = p_CHOOSE
130 self.p_ROOT = p_ROOT
133 class DMV_Rule(io.CNF_Rule):
134 '''A single CNF rule in the PCFG, of the form
135 LHS -> L R
136 where LHS, L and R are 'nodes', eg. of the form (bars, head).
138 Public members:
139 probN, probA
141 Private members:
142 __L, __R, __LHS
144 Different rule-types have different probabilities associated with
145 them:
147 _h_ -> STOP h_ P( STOP|h,L, adj)
148 _h_ -> STOP h_ P( STOP|h,L,non_adj)
149 h_ -> h STOP P( STOP|h,R, adj)
150 h_ -> h STOP P( STOP|h,R,non_adj)
151 h_ -> _a_ h_ P(-STOP|h,L, adj) * P(a|h,L)
152 h_ -> _a_ h_ P(-STOP|h,L,non_adj) * P(a|h,L)
153 h -> h _a_ P(-STOP|h,R, adj) * P(a|h,R)
154 h -> h _a_ P(-STOP|h,R,non_adj) * P(a|h,R)
156 def p(self, adj, *arg):
157 if adj:
158 return self.probA
159 else:
160 return self.probN
162 def p_STOP(self, s, t, loc_h):
163 '''Returns the correct probability, adjacent if we're rewriting from
164 the (either left or right) end of the fragment. '''
165 if self.L() == STOP:
166 return self.p(s == loc_h)
167 elif self.R() == STOP:
168 if not loc_h == s:
169 io.debug( "(%s given loc_h:%d but s:%d. Todo: optimize away!)"
170 % (self, loc_h, s) )
171 return 0.0
172 else:
173 return self.p(t == loc_h)
175 def p_ATTACH(self, r, loc_h, s=None):
176 '''Returns the correct probability, adjacent if we haven't attached
177 anything before.'''
178 if self.LHS() == self.L():
179 if not loc_h == s:
180 io.debug( "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)"
181 % (self, loc_h, s) )
182 return 0.0
183 else:
184 return self.p(r == loc_h)
185 elif self.LHS() == self.R():
186 return self.p(r+1 == loc_h)
188 def bars(self):
189 return bars(self.LHS())
191 def head(self):
192 return head(self.LHS())
194 def __init__(self, LHS, L, R, probN, probA):
195 for b_h in [LHS, L, R]:
196 if bars(b_h) not in BARS:
197 raise ValueError("bars must be in %s; was given: %s"
198 % (BARS, bars(b_h)))
199 io.CNF_Rule.__init__(self, LHS, L, R, probN)
200 self.probA = probA # adjacent
201 self.probN = probN # non_adj
203 @classmethod # so we can call DMV_Rule.bar_str(b_h)
204 def bar_str(cls, b_h, tag=lambda x:x):
205 if(b_h == ROOT):
206 return 'ROOT'
207 elif(b_h == STOP):
208 return 'STOP'
209 elif(bars(b_h) == RBAR):
210 return " %s_ " % tag(head(b_h))
211 elif(bars(b_h) == LRBAR):
212 return "_%s_ " % tag(head(b_h))
213 else:
214 return " %s " % tag(head(b_h))
217 def __str__(self, tag=lambda x:x):
218 return "%s-->%s %s\t[N %.2f] [A %.2f]" % (self.bar_str(self.LHS(), tag),
219 self.bar_str(self.L(), tag),
220 self.bar_str(self.R(), tag),
221 self.probN,
222 self.probA)
230 ###################################
231 # dmv-specific version of inner() #
232 ###################################
233 def locs(h, sent, s=0, t=None, remove=None):
234 '''Return the locations of h in sent, or some fragment of sent (in the
235 latter case we make sure to offset the locations correctly so that
236 for any x in the returned list, sent[x]==h).'''
237 if t == None:
238 t = len(sent)
239 return [i+s for i,w in enumerate(sent[s:t])
240 if w == h and not (i+s) == remove]
243 def inner_dmv(s, t, LHS, loc_h, g, sent, chart):
244 ''' A rewrite of inner in io.py, to take adjacency into accord.
246 The chart is now of this form:
247 chart[(s,t,LHS, loc_h)]
249 loc_h gives adjacency (along with r and location of other child
250 for attachment rules), and is needed in P_STOP reestimation.
252 Todo: if possible, refactor (move dmv-specific stuff back into
253 dmv, so this is "general" enough to be in io.py)
256 def O(s):
257 return sent[s]
259 sent_nums = [g.tagnum(tag) for tag in sent]
261 def e(s,t,LHS, loc_h, n_t):
262 def tab():
263 "Tabs for debug output"
264 return "\t"*n_t
266 if (s, t, LHS, loc_h) in chart:
267 io.debug("%s*= %.4f in chart: s:%d t:%d LHS:%s loc:%d"
268 %(tab(),chart[(s, t, LHS, loc_h)], s, t,
269 DMV_Rule.bar_str(LHS), loc_h))
270 return chart[(s, t, LHS, loc_h)]
271 else:
272 if s == t:
273 if not loc_h == s:
274 # terminals are always F,F for attachment
275 io.debug("%s*= 0.0 (wrong loc_h)" % tab())
276 return 0.0
277 elif (LHS, O(s)) in g.p_terminals:
278 prob = g.p_terminals[LHS, O(s)] # "b[LHS, O(s)]" in Lari&Young
279 else:
280 # todo: assuming this is how to deal w/lacking
281 # rules, since we add prob.s, and 0 is identity
282 prob = 0.0
283 io.debug( "%sLACKING TERMINAL:" % tab())
284 # todo: add to chart perhaps? Although, it _is_ simple lookup..
285 io.debug( "%s*= %.4f (terminal: %s -> %s_%d)"
286 % (tab(),prob, DMV_Rule.bar_str(LHS), O(s), loc_h) )
287 return prob
288 else:
289 p = 0.0 # "sum over j,k in a[LHS,j,k]"
290 for rule in g.sent_rules(LHS, sent_nums):
291 io.debug( "%ssumming rule %s s:%d t:%d loc:%d" % (tab(),rule,s,t,loc_h) )
292 L = rule.L()
293 R = rule.R()
294 # if it's a STOP rule, rewrite for the same range:
295 if (L == STOP) or (R == STOP):
296 if L == STOP:
297 pLR = e(s, t, R, loc_h, n_t+1)
298 elif R == STOP:
299 pLR = e(s, t, L, loc_h, n_t+1)
300 p += rule.p_STOP(s, t, loc_h) * pLR
301 io.debug( "%sp= %.4f (STOP)" % (tab(), p) )
303 else: # not a STOP, an attachment rewrite:
304 for r in range(s, t):
305 # if loc_h == t, no need to try right-attachments,
306 # if loc_h == s, no need to try left-attachments... todo
307 p_h = rule.p_ATTACH(r, loc_h, s=s)
308 if rule.LHS() == L:
309 locs_L = [loc_h]
310 locs_R = locs(head(R), sent_nums, r+1, t+1, loc_h)
311 elif rule.LHS() == R:
312 locs_L = locs(head(L), sent_nums, s, r+1, loc_h)
313 locs_R = [loc_h]
314 # see http://tinyurl.com/4ffhhw
315 p += sum([e(s, r, L, loc_L, n_t+1) *
316 p_h *
317 e(r+1, t, R, loc_R, n_t+1)
318 for loc_L in locs_L
319 for loc_R in locs_R])
320 io.debug( "%sp= %.4f (ATTACH)" % (tab(), p) )
321 chart[(s, t, LHS, loc_h)] = p
322 return p
323 # end of e-function
325 inner_prob = e(s,t,LHS,loc_h, 0)
326 if 1 in io.DEBUG:
327 print "---CHART:---"
328 for (s,t,LHS,loc_h),v in chart.iteritems():
329 print "%s -> %s_%d ... %s_%d (loc_h:%s):\t%.4f" % (DMV_Rule.bar_str(LHS,g.numtag),
330 O(s), s, O(s), t, loc_h, v)
331 print "---CHART:end---"
332 return inner_prob
333 # end of inner_dmv(s, t, LHS, loc_h, g, sent, chart)
335 def inner_sent_dmv(sent, g, chart):
336 '''Possibly there's a more efficient way? Although, non-sentence heads
337 _will_ be ruled out by inner_dmv though.'''
338 p = 0.0
339 for loc_h,h_tag in enumerate(sent):
340 p += inner_dmv(0, len(sent)-1, ROOT, loc_h, g, sent, chart)
341 return p
343 if __name__ == "__main__": # Non, Adj
344 _h_ = DMV_Rule((LRBAR,0), STOP, ( RBAR,0), 1.0, 1.0) # LSTOP
345 h_S = DMV_Rule(( RBAR,0),(NOBAR,0), STOP, 0.4, 0.3) # RSTOP
346 h_A = DMV_Rule(( RBAR,0),(LRBAR,0),( RBAR,0), 0.6, 0.7) # Lattach
347 h = DMV_Rule((NOBAR,0),(NOBAR,0),(LRBAR,0), 1.0, 1.0) # Rattach
348 b2 = {}
349 b2[(NOBAR, 0), 'h'] = 1.0
350 b2[(RBAR, 0), 'h'] = h_S.probA
351 b2[(LRBAR, 0), 'h'] = h_S.probA * _h_.probA
353 g_dup = DMV_Grammar([ _h_, h_S, h_A, h ],b2,0,0,0, {0:'h'}, {'h':0})
355 io.DEBUG = []
356 test0 = inner_dmv(0, 1, (LRBAR,0), 0, g_dup, 'h h'.split(), {})
357 if not "0.120"=="%.3f" % test0:
358 print "Should be 0.120: %.3f" % test0
360 test1 = inner_dmv(0, 1, (LRBAR,0), 1, g_dup, 'h h'.split(), {})
361 if not "0.063"=="%.3f" % test1:
362 print "Should be 0.063: %.3f" % test1
363 io.DEBUG = [1]
364 test3 = inner_dmv(0, 2, (LRBAR,0), 2, g_dup, 'h h h'.split(), {})
365 if not "0.0498"=="%.4f" % test3:
366 print "Should be 0.0498: %.4f" % test3
373 ##############################
374 # DMV-probabilities, todo: #
375 ##############################
378 def P_CHOOSE():
379 return "todo"
381 def DMV(sent, g):
382 '''Here it seems like they store rule information on a per-head (per
383 direction) basis, in deps_D(h, dir) which gives us a list. '''
384 def P_h(h):
385 P_h = 1 # ?
386 for dir in ['l', 'r']:
387 for a in deps(h, dir):
388 # D(a)??
389 P_h *= \
390 P_STOP (0, h, dir, adj) * \
391 P_CHOOSE (a, h, dir) * \
392 P_h(D(a)) * \
393 P_STOP (STOP | h, dir, adj)
394 return P_h
395 return P_h(root(sent))
398 def P_STOP(STOP, h, dir, adj, g, corpus):
399 '''corpus is a list of sentences s.
401 This is based on the formula where STOP is True... not sure how we
402 calculate if STOP is False.
404 I thought about instead having this:
406 for rule in g.p_rules:
407 rule.num = 0
408 rule.den = 0
409 for sent in corpus:
410 for rule in g.p_rules:
411 for s:
412 for t:
413 set num and den using inner
414 for rule in g.p_rules
415 rule.prob = rule.num / rule.den
417 ..the way I'm assuming we do it in the commented out io-function in
418 io.py. Having sentences as the outer loop at least we can easily just
419 go through the heads that are actually in the sentence... BUT, this
420 means having to go through p_rules 3 times, not sure what is slower.
422 Also, now inner_dmv makes sure it only goes through heads that are
423 actually in the sentence, so that argument falls.
425 oh, and:
426 P_STOP(-STOP|...) = 1 - P_STOP(STOP|...)
430 P_STOP_num = 0
431 P_STOP_den = 0
432 h_tag = g.numtag(h)
433 for sent in corpus:
434 # have to go through _all_ places where h appears in the
435 # sentence...how? how to make sure it _works_?
436 chart = {}
437 locs_h = locs(h_tag, sent)
438 io.debug( "locs_h:%s, sent:%s"%(locs_h,sent) , 2)
439 for loc_h in locs_h:
440 inner_dmv(0, len(sent)-1, ROOT, loc_h, g, sent, chart)
441 for s in range(loc_h): # s<loc(h), range gives strictly less
442 for t in range(loc_h, len(sent)):
443 io.debug( "s:%s t:%s loc:%d"%(s,t,loc_h) , 2)
444 if (s, t, (LRBAR,h), loc_h) in chart:
445 io.debug( "num+=%s"%chart[(s, t, (LRBAR,h), loc_h)] , 2)
446 P_STOP_num += chart[(s, t, (LRBAR,h), loc_h)]
447 if (s, t, (RBAR,h), loc_h) in chart:
448 io.debug( "den+=%s"%chart[(s, t, (RBAR,h), loc_h)] , 2)
449 P_STOP_den += chart[(s, t, (RBAR,h), loc_h)]
450 # todo: use sum([chart[(s, t...)] etc? but can we then
451 # keep den and num separate?
453 io.debug( "num/den: %s / %s"%(P_STOP_num, P_STOP_den) , 2)
454 if P_STOP_den > 0.0:
455 io.debug( "num/den: %s / %s = %s"%(P_STOP_num, P_STOP_den,P_STOP_num / P_STOP_den) , 2)
456 return P_STOP_num / P_STOP_den # upside down in article
457 else:
458 return 0.0
462 def testreestimation():
463 testcorpus = [s.split() for s in ['det vbd nn c vbd','det nn vbd c nn vbd pp',
464 'det vbd nn', 'det vbd nn c vbd pp',
465 'det vbd nn', 'det vbd c nn vbd pp',
466 'det vbd nn', 'det nn vbd nn c vbd pp',
467 'det vbd nn', 'det nn vbd c det vbd pp',
468 'det vbd nn', 'det nn vbd c vbd det det det pp',
469 'det nn vbd', 'det nn vbd c vbd pp',
470 'det nn vbd', 'det nn vbd c vbd det pp',
471 'det nn vbd', 'det nn vbd c vbd pp',
472 'det nn vbd pp', 'det nn vbd det', ]]
473 g = harmonic.initialize(testcorpus)
475 h_tag = 'nn'
476 h = g.tagnum(h_tag)
477 print '''This will take some time. todo: figure out why it doesn't do
478 anything if nn is always second word.'''
479 for r in g.h_rules(h):
480 if r.L()==STOP:
481 print r
482 # print "off-set the rule, see what happens:"
483 # r.probN = 0.7
484 # print r
485 for i in range(3):
486 pstophln = P_STOP(True, h, 'L', 'N', g, testcorpus)
487 print "p(STOP|%s,L,N):%s"%(h_tag,pstophln)
489 for r in g.h_rules(h):
490 if r.L()==STOP:
491 print r
492 r.probN = pstophln
493 print r
494 return "todo"
498 def testreestimation_h():
499 _h_ = DMV_Rule((LRBAR,0), STOP, ( RBAR,0), 1.0, 1.0) # LSTOP
500 h_S = DMV_Rule(( RBAR,0),(NOBAR,0), STOP, 0.4, 0.3) # RSTOP
501 h_A = DMV_Rule(( RBAR,0),(LRBAR,0),( RBAR,0), 0.6, 0.7) # Lattach
502 h = DMV_Rule((NOBAR,0),(NOBAR,0),(LRBAR,0), 1.0, 1.0) # Rattach
503 rh = DMV_Rule( ROOT, STOP, (LRBAR,0), 1.0, 1.0) # ROOT
504 b2 = {}
505 b2[(NOBAR, 0), 'h'] = 1.0
506 b2[(RBAR, 0), 'h'] = h_S.probA
507 b2[(LRBAR, 0), 'h'] = h_S.probA * _h_.probA
509 g_dup = DMV_Grammar([ rh, _h_, h_S, h_A, h ],b2,0,0,0, {0:'h'}, {'h':0})
511 # test3 = inner_dmv(0, 2, (LRBAR,0), 2, g_dup, 'h h h'.split(), {})
512 h_tag = 'h'
513 h = 0
514 print "todo: figure out why it doesn't work"
515 for r in g_dup.h_rules(h):
516 if r.L()==STOP:
517 print r
518 # print "off-set the rule, see what happens:"
519 # r.probN = 0.7
520 # print r
521 for i in range(3):
522 pstophln = P_STOP(True, h, 'L', 'N', g_dup, ['h h h'.split()])
523 print "p(STOP|%s,L,N):%s"%(h_tag,pstophln)
525 for r in g_dup.h_rules(h):
526 if r.L()==STOP:
527 print r
528 r.probN = pstophln
529 print r
530 return "todo"
532 if __name__ == "__main__":
533 io.DEBUG = []
534 import timeit
535 timeit.Timer("dmv.testreestimation()",'''import dmv
536 reload(dmv)''').timeit(1)
537 pass
541 # todo: some more testing on the Brown corpus:
542 # # first five sentences of the Brown corpus:
543 # g_brown = harmonic.initialize([['AT', 'NP-TL', 'NN-TL', 'JJ-TL', 'NN-TL', 'VBD', 'NR', 'AT', 'NN', 'IN', 'NP$', 'JJ', 'NN', 'NN', 'VBD', '``', 'AT', 'NN', "''", 'CS', 'DTI', 'NNS', 'VBD', 'NN', '.'], ['AT', 'NN', 'RBR', 'VBD', 'IN', 'NN', 'NNS', 'CS', 'AT', 'NN-TL', 'JJ-TL', 'NN-TL', ',', 'WDT', 'HVD', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', '``', 'VBZ', 'AT', 'NN', 'CC', 'NNS', 'IN', 'AT', 'NN-TL', 'IN-TL', 'NP-TL', "''", 'IN', 'AT', 'NN', 'IN', 'WDT', 'AT', 'NN', 'BEDZ', 'VBN', '.'], ['AT', 'NP', 'NN', 'NN', 'HVD', 'BEN', 'VBN', 'IN', 'NP-TL', 'JJ-TL', 'NN-TL', 'NN-TL', 'NP', 'NP', 'TO', 'VB', 'NNS', 'IN', 'JJ', '``', 'NNS', "''", 'IN', 'AT', 'JJ', 'NN', 'WDT', 'BEDZ', 'VBN', 'IN', 'NN-TL', 'NP', 'NP', 'NP', '.'], ['``', 'RB', 'AT', 'JJ', 'NN', 'IN', 'JJ', 'NNS', 'BEDZ', 'VBN', "''", ',', 'AT', 'NN', 'VBD', ',', '``', 'IN', 'AT', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', 'AT', 'NN', 'IN', 'NNS', 'CC', 'AT', 'NN', 'IN', 'DT', 'NN', "''", '.'], ['AT', 'NN', 'VBD', 'PPS', 'DOD', 'VB', 'CS', 'AP', 'IN', 'NP$', 'NN', 'CC', 'NN', 'NNS', '``', 'BER', 'JJ', 'CC', 'JJ', 'CC', 'RB', 'JJ', "''", '.'], ['PPS', 'VBD', 'CS', 'NP', 'NNS', 'VB', '``', 'TO', 'HV', 'DTS', 'NNS', 'VBN', 'CC', 'VBN', 'IN', 'AT', 'NN', 'IN', 'VBG', 'CC', 'VBG', 'PPO', "''", '.'], ['AT', 'JJ', 'NN', 'VBD', 'IN', 'AT', 'NN', 'IN', 'AP', 'NNS', ',', 'IN', 'PPO', 'AT', 'NP', 'CC', 'NP-TL', 'NN-TL', 'VBG', 'NNS', 'WDT', 'PPS', 'VBD', '``', 'BER', 'QL', 'VBN', 'CC', 'VB', 'RB', 'VBN', 'NNS', 'WDT', 'VB', 'IN', 'AT', 'JJT', 'NN', 'IN', 'ABX', 'NNS', "''", '.'], ['NN-HL', 'VBN-HL'], ['WRB', ',', 'AT', 'NN', 'VBD', 'PPS', 'VBZ', '``', 'DTS', 'CD', 'NNS', 'MD', 'BE', 'VBN', 'TO', 'VB', 'JJR', 'NN', 'CC', 'VB', 'AT', 'NN', 'IN', 'NN', "''", '.'], ['AT', 'NN-TL', 'VBG-TL', 'NN-TL', ',', 'AT', 'NN', 'VBD', ',', '``', 'BEZ', 'VBG', 'IN', 'VBN', 'JJ', 'NNS', 'CS', 'AT', 'NN', 'IN', 'NN', 'NNS', 'NNS', "''", '.']])
544 # # 36:'AT' in g_brown.numtag, 40:'NP-TL'
546 # io.DEBUG = []
547 # test_brown = inner_dmv(0,2, (LRBAR,36), g_brown, ['AT', 'NP-TL' ,'NN-TL','JJ-TL'], {})
548 # if 1 in io.DEBUG:
549 # for r in g_brown.rules((2,36)) + g_brown.rules((1,36)) + g_brown.rules((0,36)):
550 # L = r.L()
551 # R = r.R()
552 # if head(L) in [36,40,-2] and head(R) in [36,40,-2]:
553 # print r
554 # print "Brown-test gives: %.8f" % test_brown
558 # this will give the tag sequences of all the 6218 Brown corpus
559 # sentences of length < 7:
560 # [[tag for (w, tag) in sent]
561 # for sent in nltk.corpus.brown.tagged_sents() if len(sent) < 7]