inner_dmv() rewritten with loc_h, works with simple sentence but needs more debugging...
[dmvccm.git] / src / dmv.py
blobe79d0d85da075771ca82974bb1853fa427e5de10
1 #### changes by KBU:
2 # 2008-05-24:
3 # - prettier printout for DMV_Rule
4 # - DMV_Rule changed a bit. head, L and R are now all pairs of the
5 # form (bars, head).
6 # - Started on P_STOP, a bit less pseudo now..
8 # 2008-05-27:
9 # - started on initialization. So far, I have frequencies for
10 # everything, very harmonic. Still need to make these into 1-summing
11 # probabilities
13 # 2008-05-28:
14 # - more work on initialization (init_freq and init_normalize),
15 # getting closer to probabilities now.
17 # 2008-05-29:
18 # - init_normalize is done, it creates p_STOP, p_ROOT and p_CHOOSE,
19 # and also adds the relevant probabilities to p_rules in a grammar.
20 # Still, each individual rule has to store both adjacent and non_adj
21 # probabilities, and inner() should be able to send some parameter
22 # which lets the rule choose... hopefully... Is this possible to do
23 # top-down even? when the sentence could be all the same words?
24 # todo: extensive testing of identical words in sentences!
25 # - frequencies (only used in initialization) are stored as strings,
26 # but in the rules and p_STOP etc, there are only numbers.
28 # 2008-05-30
29 # - copied inner() into this file, to make the very dmv-specific
30 # adjacency stuff work (have to factor that out later on, when it
31 # works).
33 # 2008-06-01
34 # - finished typing in inner_dmv(), still have to test and debug
35 # it. The chart is now four times as big since for any rule we may
36 # have attachments to either the left or the right below, which
37 # upper rules depend on, for selecting probN or probA
39 # 2008-06-03
40 # - fixed a number of little bugs in initialization, where certain
41 # rules were simply not created, or created "backwards"
42 # - inner_dmv() should Work now...
44 # 2008-06-04
45 # - moved initialization to harmonic.py
48 # import numpy # numpy provides Fast Arrays, for future optimization
49 import pprint
50 import io
51 import harmonic
53 # non-tweakable/constant "lookup" globals
54 BARS = [0,1,2]
55 RBAR = 1
56 LRBAR = 2
57 NOBAR = 0
58 ROOT = (LRBAR, -1)
59 STOP = (NOBAR, -2)
61 if __name__ == "__main__":
62 print "DMV module tests:"
65 def node(bars, head):
66 '''Useless function, but just here as documentation. Nodes make up
67 LHS, R and L in each DMV_Rule'''
68 return (bars, head)
70 def bars(node):
71 return node[0]
73 def head(node):
74 return node[1]
77 class DMV_Grammar(io.Grammar):
78 '''The DMV-PCFG.
80 Public members:
81 p_STOP, p_ROOT, p_CHOOSE, p_terminals
82 These are changed in the Maximation step, then used to set the
83 new probabilities of each DMV_Rule.
85 Todo: make p_terminals private? (But it has to be changable in
86 maximation step due to the short-cutting rules... could of course
87 make a DMV_Grammar function to update the short-cut rules...)
89 __p_rules is private, but we can still say stuff like:
90 for r in g.all_rules():
91 r.probN = newProbN
93 What other representations do we need? (P_STOP formula uses
94 deps_D(h,l/r) at least)'''
95 def __str__(self):
96 str = ""
97 for r in self.all_rules():
98 str += "%s\n" % r.__str__(self.numtag)
99 return str
101 def h_rules(self, h):
102 return [r for r in self.all_rules() if r.head() == h]
104 def rules(self, LHS):
105 return [r for r in self.all_rules() if r.LHS() == LHS]
107 def sent_rules(self, LHS, sent_nums):
108 "Used in inner_dmv."
109 # We don't want to rule out STOPs!
110 sent_nums.append( head(STOP) )
111 return [r for r in self.all_rules() if r.LHS() == LHS
112 and head(r.L()) in sent_nums and head(r.R()) in sent_nums]
114 def heads(self):
115 '''Not sure yet what is needed here, or where this is needed'''
116 return numtag
118 def deps_L(self, head):
119 # todo test, probably this list comprehension doesn't work
120 return [a for r in self.all_rules() if r.head() == head and a == r.L()]
122 def deps_R(self, head):
123 # todo test, probably this list comprehension doesn't work
124 return [a for r in self.all_rules() if r.head() == head and a == r.R()]
126 def __init__(self, p_rules, p_terminals, p_STOP, p_CHOOSE, p_ROOT, numtag, tagnum):
127 io.Grammar.__init__(self, p_rules, p_terminals, numtag, tagnum)
128 self.p_STOP = p_STOP
129 self.p_CHOOSE = p_CHOOSE
130 self.p_ROOT = p_ROOT
133 class DMV_Rule(io.CNF_Rule):
134 '''A single CNF rule in the PCFG, of the form
135 LHS -> L R
136 where LHS, L and R are 'nodes', eg. of the form (bars, head).
138 Public members:
139 probN, probA
141 Private members:
142 __L, __R, __LHS
144 Different rule-types have different probabilities associated with
145 them:
147 _h_ -> STOP h_ P( STOP|h,L, adj)
148 _h_ -> STOP h_ P( STOP|h,L,non_adj)
149 h_ -> h STOP P( STOP|h,R, adj)
150 h_ -> h STOP P( STOP|h,R,non_adj)
151 h_ -> _a_ h_ P(-STOP|h,L, adj) * P(a|h,L)
152 h_ -> _a_ h_ P(-STOP|h,L,non_adj) * P(a|h,L)
153 h -> h _a_ P(-STOP|h,R, adj) * P(a|h,R)
154 h -> h _a_ P(-STOP|h,R,non_adj) * P(a|h,R)
156 def p_old(self, LRattach, RLattach, *arg):
157 '''Returns the correct probability, adjacent or non-adjacent,
158 depending on whether or not there is a some lower attachment
159 either on the right side of the left child, or the left side
160 of the right child. '''
161 if (not LRattach) and (not RLattach):
162 return self.probA
163 else:
164 return self.probN
166 def p(self, s, r, t, loc_h, *arg):
167 '''Returns the correct probability, adjacent or non-adjacent,
168 depending on whether or not there is a some lower attachment
169 either on the right side of the left child, or the left side
170 of the right child. Uses s, r or t to infer this.'''
171 if self.L() == STOP:
172 adj = s == loc_h
173 elif self.R() == STOP:
174 adj = t == loc_h
175 if not loc_h == s:
176 io.debug( "(%s given loc_h:%d, s:%d, todo: optimize away!)"
177 % (self,loc_h,s) )
178 return 0.0
179 elif self.LHS() == self.L(): # right attachment
180 adj = r == loc_h
181 if not loc_h == s:
182 io.debug( "(%s given loc_h:%d, s:%d, todo: optimize away!)"
183 % (self,loc_h,s) )
184 return 0.0
185 elif self.LHS() == self.R(): # left attachment
186 adj = r+1 == loc_h
187 if adj:
188 return self.probA
189 else:
190 return self.probN
192 def bars(self):
193 return bars(self.LHS())
195 def head(self):
196 return head(self.LHS())
198 def __init__(self, LHS, L, R, probN, probA):
199 for b_h in [LHS, L, R]:
200 if bars(b_h) not in BARS:
201 raise ValueError("bars must be in %s; was given: %s"
202 % (BARS, bars(b_h)))
203 io.CNF_Rule.__init__(self, LHS, L, R, probN)
204 self.probA = probA # adjacent
205 self.probN = probN # non_adj
207 @classmethod # so we can call DMV_Rule.bar_str(b_h)
208 def bar_str(cls, b_h, tag=lambda x:x):
209 if(b_h == ROOT):
210 return 'ROOT'
211 elif(b_h == STOP):
212 return 'STOP'
213 elif(bars(b_h) == RBAR):
214 return " %s_ " % tag(head(b_h))
215 elif(bars(b_h) == LRBAR):
216 return "_%s_ " % tag(head(b_h))
217 else:
218 return " %s " % tag(head(b_h))
221 def __str__(self, tag=lambda x:x):
222 return "%s-->%s %s\t[N %.2f] [A %.2f]" % (self.bar_str(self.LHS(), tag),
223 self.bar_str(self.L(), tag),
224 self.bar_str(self.R(), tag),
225 self.probN,
226 self.probA)
234 ###################################
235 # dmv-specific version of inner() #
236 ###################################
237 def rewrite_adj(bars, Lattach, Rattach):
238 '''Returns a list of possible adjacencies for the left and right
239 children of an attachment rule. Each possible adjacency is a list
240 of booleans of the form (LL, LR, RL, RR).
242 Todo: make prettier? Although since we call this so many times,
243 having it spelled out here is probably faster'''
244 if bars == NOBAR and not Lattach and Rattach:
245 return ( (Lattach, False, False, False),
246 (Lattach, False, False, True),
247 (Lattach, False, True, False),
248 (Lattach, False, True, True),
249 (Lattach, True, False, False),
250 (Lattach, True, False, True),
251 (Lattach, True, True, False),
252 (Lattach, True, True, True), )
253 elif bars == RBAR and Lattach:
254 # Rattach may be either true or false here!
255 return ( (False, False, False, Rattach),
256 (False, False, True, Rattach),
257 (False, True, False, Rattach),
258 (False, True, True, Rattach),
259 (True, False, False, Rattach),
260 (True, False, True, Rattach),
261 (True, True, False, Rattach),
262 (True, True, True, Rattach) )
263 else:
264 # NOBAR rewrite rules cannot have Lattach below, and must
265 # have/add Rattach. RBAR rewrite rules must add Lattach, but
266 # don't care about Rattach. Returning () should ensure we
267 # don't add any probability to such "false" situations
268 return ()
270 def inner_dmv_old(s, t, LHS, g, sent, chart):
271 ''' A rewrite of inner in io.py, to take adjacency into accord.
273 The chart is now 4 times bigger, since there are different values
274 for with or without L/R attachments:
275 chart[(s,t,LHS, Lattach, Rattach)]
277 If Rattach==True then the rule has a right-attachment or there is
278 one lower in the tree (meaning we're no longer
279 adjacent). Adjacency depends on whether there is an attachment
280 lower in the tree, cf. DMV_Rule.p(LRattach, RLattach).
282 Todo: if possible, refactor (move dmv-specific stuff back into
283 dmv, so this is "general" enough to be in io.py)
286 def debug_inner_dmv(tabs,s,t,LHS,Lattach,Rattach):
287 if io.DEBUG:
288 attach = {
289 (True, True): "left and right attachments below",
290 (True, False): "left attachment(s) below",
291 (False, True): "right attachment(s) below",
292 (False, False): "no attachments below" }
293 info = (tabs,O(s),s,O(t),t, DMV_Rule.bar_str(LHS), attach[Lattach,Rattach])
294 print "%sTrying from %s_%d to %s_%d with %s, %s:" % info
296 def O(s):
297 return sent[s]
299 sent_nums = [g.tagnum(tag) for tag in sent]
301 def e(s,t,LHS, Lattach, Rattach, n_t):
302 def tab():
303 "Tabs for debug output"
304 return "\t"*n_t
306 if (s, t, LHS, Lattach, Rattach) in chart:
307 return chart[(s, t, LHS, Lattach, Rattach)]
308 else:
309 debug_inner_dmv(tab(),s,t,LHS, Lattach, Rattach)
310 if s == t:
311 if Lattach or Rattach:
312 # terminals are always F,F for attachment
313 io.debug("%s= 0.0 (1 word, no lower attach)" % tab())
314 return 0.0
315 elif (LHS, O(s)) in g.p_terminals:
316 prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] in Lari&Young
317 else:
318 # todo: assuming this is how to deal with lacking
319 # rules, since we add prob.s, and 0 is identity
320 prob = 0.0
321 io.debug( "%sLACKING TERMINAL:" % tab())
322 # todo: add to chart perhaps? Although, it _is_ simple lookup..
323 io.debug( "%s= %.1f (terminal: %s -> %s)" % (tab(),prob,
324 DMV_Rule.bar_str(LHS),
325 O(s)) )
326 return prob
327 else:
328 if (s,t,LHS,Lattach, Rattach) not in chart:
329 chart[(s,t,LHS,Lattach,Rattach)] = 0.0
330 for rule in g.sent_rules(LHS, sent_nums): # summing over j,k in a[LHS,j,k]
331 io.debug( "%ssumming rule %s" % (tab(),rule) )
332 L = rule.L()
333 R = rule.R()
334 # if it's a STOP rule, rewrite for the same range:
335 if (L == STOP) or (R == STOP):
336 if L == STOP:
337 p = rule.p_old(Lattach, False) # todo check
338 pLR = e(s, t, R, Lattach, Rattach, n_t+1)
339 elif R == STOP:
340 p = rule.p_old(False, Rattach) # todo check
341 pLR = e(s, t, L, Lattach, Rattach, n_t+1)
342 chart[(s, t, LHS, Lattach, Rattach)] += p * pLR
344 # not a STOP, an attachment rewrite:
345 else:
346 for r in range(s, t):
347 if head(L) in sent_nums[s:r+1] and head(R) in sent_nums[r+1:t+1]:
348 # LL etc are boolean attachment values
349 for (LL, LR, RL, RR) in rewrite_adj(rule.bars(), Lattach, Rattach):
350 p = rule.p_old(LR, RL) # probN or probA
351 pL = e(s, r, L, LL, LR, n_t+1)
352 pR = e(r+1, t, R, RL, RR, n_t+1)
353 chart[(s, t, LHS,Lattach,Rattach)] += p * pL * pR
355 return chart[(s, t, LHS,Lattach,Rattach)]
356 # end of e-function
358 inner_prob = e(s,t,LHS,True,True, 0) + e(s,t,LHS,True,False, 0) + e(s,t,LHS,False,True, 0) + e(s,t,LHS,False,False, 0)
359 if io.DEBUG:
360 print "---CHART:---"
361 for (s,t,LHS,L,R),v in chart.iteritems():
362 print "\t%s -> %s_%d ... %s_%d (L:%s, R:%s):\t%.3f" % (DMV_Rule.bar_str(LHS,g.numtag),
363 O(s), s,
364 O(s), t,
365 L, R, v)
366 print "---CHART:end---"
367 return [inner_prob, chart]
368 # end of inner_dmv_old (Lattach,Rattach)
373 def locs(h, sent, s=0, t=None):
374 '''Return the locations of h in sent, or some fragment of sent (in the
375 latter case we make sure to offset the locations correctly so that
376 for any x in the returned list, sent[x]==h).'''
377 if t == None:
378 t = len(sent)
379 return [i+s for i,w in enumerate(sent[s:t]) if w == h]
382 def inner_dmv(s, t, LHS, loc_h, g, sent, chart):
383 ''' A rewrite of inner in io.py, to take adjacency into accord.
385 The chart now has loc_h (instead of Lattach, Rattach):
386 chart[(s,t,LHS, loc_h)]
388 loc_h gives adjacency (along with r for attachment rules), and is
389 needed in P_STOP reestimation.
391 Todo: if possible, refactor (move dmv-specific stuff back into
392 dmv, so this is "general" enough to be in io.py)
395 def O(s):
396 return sent[s]
398 sent_nums = [g.tagnum(tag) for tag in sent]
400 def e(s,t,LHS, loc_h, n_t):
401 def tab():
402 "Tabs for debug output"
403 return "\t"*n_t
405 if (s, t, LHS, loc_h) in chart:
406 return chart[(s, t, LHS, loc_h)]
407 else:
408 if s == t:
409 if not loc_h == s:
410 # terminals are always F,F for attachment
411 io.debug("%s*= 0.0 (wrong loc_h)" % tab())
412 return 0.0
413 elif (LHS, O(s)) in g.p_terminals:
414 prob = g.p_terminals[LHS, O(s)] # "b[LHS, O(s)]" in Lari&Young
415 else:
416 # todo: assuming this is how to deal w/lacking
417 # rules, since we add prob.s, and 0 is identity
418 prob = 0.0
419 io.debug( "%sLACKING TERMINAL:" % tab())
420 # todo: add to chart perhaps? Although, it _is_ simple lookup..
421 io.debug( "%s*= %.1f (terminal: %s -> %s_%d)" % (tab(),prob,
422 DMV_Rule.bar_str(LHS),
423 O(s), loc_h) )
424 return prob
425 else:
426 p = 0.0 # "sum over j,k in a[LHS,j,k]"
427 for rule in g.sent_rules(LHS, sent_nums):
428 io.debug( "%ssumming rule %s" % (tab(),rule) )
429 L = rule.L()
430 R = rule.R()
431 # if it's a STOP rule, rewrite for the same range:
432 if (L == STOP) or (R == STOP):
433 if L == STOP:
434 p_h = rule.p(s,s,t,loc_h)
435 pLR = e(s, t, R, loc_h, n_t+1)
436 elif R == STOP:
437 p_h = rule.p(s,s,t,loc_h)
438 pLR = e(s, t, L, loc_h, n_t+1)
439 p += p_h * pLR
440 io.debug( "%s= %.3f" % (tab(), p) )
442 else: # not a STOP, an attachment rewrite:
443 for r in range(s, t):
444 p_h = rule.p(s,r,t,loc_h) # probN or probA
445 if rule.LHS() == L:
446 locs_L = [loc_h]
447 locs_R = locs(head(R), sent_nums, r+1, t+1)
448 elif rule.LHS() == R:
449 locs_L = locs(head(L), sent_nums, s, r+1)
450 locs_R = [loc_h]
451 # see http://tinyurl.com/4ffhhw
452 p += sum([e(s, r, L, loc_L, n_t+1) * p_h *
453 e(r+1, t, R, loc_R, n_t+1)
454 for loc_L in locs_L
455 for loc_R in locs_R])
456 io.debug( "%s+= %.3f" % (tab(), p) )
457 chart[(s, t, LHS, loc_h)] = p
458 return p
459 # end of e-function
461 inner_prob = e(s,t,LHS,loc_h, 0)
462 if io.DEBUG:
463 print "---CHART:---"
464 for (s,t,LHS,loc_h),v in chart.iteritems():
465 print "\t%s -> %s_%d ... %s_%d (loc_h:%s):\t%.3f" % (DMV_Rule.bar_str(LHS,g.numtag),
466 O(s), s, O(s), t, loc_h, v)
467 print "---CHART:end---"
468 return [inner_prob, chart]
469 # end of inner_dmv (loc_h)
471 if __name__ == "__main__": # Non, Adj
472 _h_ = DMV_Rule((LRBAR,0), STOP, ( RBAR,0), 1.0, 1.0) # LSTOP
473 h_S = DMV_Rule(( RBAR,0),(NOBAR,0), STOP, 0.4, 0.3) # RSTOP
474 h_A = DMV_Rule(( RBAR,0),(LRBAR,0),( RBAR,0), 0.6, 0.7) # Lattach
475 h = DMV_Rule((NOBAR,0),(NOBAR,0),(LRBAR,0), 1.0, 1.0) # Rattach
476 b2 = {}
477 b2[(NOBAR, 0), 'h'] = 1.0
478 b2[(RBAR, 0), 'h'] = h_S.probA
479 b2[(LRBAR, 0), 'h'] = h_S.probA * _h_.probA
481 g_dup = DMV_Grammar([ _h_, h_S, h_A, h ],b2,0,0,0, {0:'h'}, {'h':0})
483 io.DEBUG = 0
484 test0 = inner_dmv(0, 1, (LRBAR,0), 0, g_dup, 'h h'.split(), {})
485 if not "0.120"=="%.3f" % test0[0]:
486 print "Should be 0.120: %.3f" % test0[0]
488 test1 = inner_dmv(0, 1, (LRBAR,0), 1, g_dup, 'h h'.split(), {})
489 if not "0.063"=="%.3f" % test1[0]:
490 print "Should be 0.063: %.3f" % test1[0]
493 ##############################
494 # DMV-probabilities, todo: #
495 ##############################
498 def P_CHOOSE():
499 return "todo"
501 def DMV(sent, g):
502 '''Here it seems like they store rule information on a per-head (per
503 direction) basis, in deps_D(h, dir) which gives us a list. '''
504 def P_h(h):
505 P_h = 1 # ?
506 for dir in ['l', 'r']:
507 for a in deps(h, dir):
508 # D(a)??
509 P_h *= \
510 P_STOP (0, h, dir, adj) * \
511 P_CHOOSE (a, h, dir) * \
512 P_h(D(a)) * \
513 P_STOP (STOP | h, dir, adj)
514 return P_h
515 return P_h(root(sent))
518 def P_STOP(STOP, h, dir, adj, g, corpus):
519 '''corpus is a list of sentences s.
521 This is based on the formula where STOP is True... not sure how we
522 calculate if STOP is False.
525 I thought about instead having this:
527 for rule in g.p_rules:
528 rule.num = 0
529 rule.den = 0
530 for sent in corpus:
531 for rule in g.p_rules:
532 for s:
533 for t:
534 set num and den using inner
535 for rule in g.p_rules
536 rule.prob = rule.num / rule.den
538 ..the way I'm assuming we do it in the commented out io-function in
539 io.py. Having sentences as the outer loop at least we can easily just
540 go through the heads that are actually in the sentence... BUT, this
541 means having to go through p_rules 3 times, not sure what is slower.
543 Also, now inner_dmv makes sure it only goes through heads that are
544 actually in the sentence, so that argument falls.
546 oh, and:
547 P_STOP(-STOP|...) = 1 - P_STOP(STOP|...)
551 P_STOP_num = 0
552 P_STOP_den = 0
553 h_tag = g.numtag(h)
554 for sent in corpus:
555 # have to go through _all_ places where h appears in the
556 # sentence...how? how to make sure it _works_?
557 chart = {} # cuts time from 17s to 7s !
558 if h_tag in sent:
559 locs_h = locs(h_tag, sent)
560 io.debug( "locs_h:%s, sent:%s"%(locs_h,sent))
561 for loc_h in locs_h:
562 for s in range(loc_h): # s<loc(h), range gives strictly less
563 for t in range(loc_h, len(sent)): # should not be range(s,..), right? todo
564 P_STOP_num += inner_dmv(s, t, (LRBAR,h), loc_h, g, sent, chart)[0]
565 P_STOP_den += inner_dmv(s, t, (RBAR,h), loc_h, g, sent, chart)[0]
567 io.debug( "num/den: %s / %s"%(P_STOP_num, P_STOP_den))
568 if P_STOP_den > 0.0:
569 return P_STOP_num / P_STOP_den # upside down in article
570 else:
571 return 0.0
575 def testreestimation():
576 testcorpus = [s.split() for s in ['det nn vbd c vbd','det nn vbd c nn vbd pp',
577 'det nn vbd', 'det vbd nn c vbd pp',
578 'det nn vbd', 'det vbd c nn vbd pp',
579 'det nn vbd', 'det nn vbd c vbd pp',
580 'det nn vbd', 'det nn vbd c det vbd pp',
581 'det nn vbd', 'det nn vbd c vbd det det det pp',
582 'det nn vbd', 'det nn vbd c vbd pp',
583 'det nn vbd', 'det nn vbd c vbd det pp',
584 'det nn vbd', 'det nn vbd c vbd pp',
585 'det nn vbd pp', 'det nn vbd det', ]]
586 g = harmonic.initialize(testcorpus)
588 h_tag = 'nn'
589 h = g.tagnum(h_tag)
590 print "This will take some time. todo: figure out why it doesn't work"
591 for r in g.h_rules(h):
592 if r.L()==STOP:
593 print r
594 print "off-set the rule, see what happens:"
595 r.probN = 0.7
596 print r
597 for i in range(10):
598 pstophln = P_STOP(True, h, 'L', 'N', g, testcorpus)
599 print "p(STOP|%s,L,N):%s"%(h_tag,pstophln)
601 for r in g.h_rules(h):
602 if r.L()==STOP:
603 print r
604 r.probN = pstophln
605 print r
606 return "todo"
608 if __name__ == "__main__":
609 io.DEBUG = 0
610 import timeit
611 timeit.Timer("dmv.testreestimation()","import dmv").timeit(1)
612 # pass
616 # todo: some more testing on the Brown corpus:
617 # # first five sentences of the Brown corpus:
618 # g_brown = harmonic.initialize([['AT', 'NP-TL', 'NN-TL', 'JJ-TL', 'NN-TL', 'VBD', 'NR', 'AT', 'NN', 'IN', 'NP$', 'JJ', 'NN', 'NN', 'VBD', '``', 'AT', 'NN', "''", 'CS', 'DTI', 'NNS', 'VBD', 'NN', '.'], ['AT', 'NN', 'RBR', 'VBD', 'IN', 'NN', 'NNS', 'CS', 'AT', 'NN-TL', 'JJ-TL', 'NN-TL', ',', 'WDT', 'HVD', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', '``', 'VBZ', 'AT', 'NN', 'CC', 'NNS', 'IN', 'AT', 'NN-TL', 'IN-TL', 'NP-TL', "''", 'IN', 'AT', 'NN', 'IN', 'WDT', 'AT', 'NN', 'BEDZ', 'VBN', '.'], ['AT', 'NP', 'NN', 'NN', 'HVD', 'BEN', 'VBN', 'IN', 'NP-TL', 'JJ-TL', 'NN-TL', 'NN-TL', 'NP', 'NP', 'TO', 'VB', 'NNS', 'IN', 'JJ', '``', 'NNS', "''", 'IN', 'AT', 'JJ', 'NN', 'WDT', 'BEDZ', 'VBN', 'IN', 'NN-TL', 'NP', 'NP', 'NP', '.'], ['``', 'RB', 'AT', 'JJ', 'NN', 'IN', 'JJ', 'NNS', 'BEDZ', 'VBN', "''", ',', 'AT', 'NN', 'VBD', ',', '``', 'IN', 'AT', 'JJ', 'NN', 'IN', 'AT', 'NN', ',', 'AT', 'NN', 'IN', 'NNS', 'CC', 'AT', 'NN', 'IN', 'DT', 'NN', "''", '.'], ['AT', 'NN', 'VBD', 'PPS', 'DOD', 'VB', 'CS', 'AP', 'IN', 'NP$', 'NN', 'CC', 'NN', 'NNS', '``', 'BER', 'JJ', 'CC', 'JJ', 'CC', 'RB', 'JJ', "''", '.'], ['PPS', 'VBD', 'CS', 'NP', 'NNS', 'VB', '``', 'TO', 'HV', 'DTS', 'NNS', 'VBN', 'CC', 'VBN', 'IN', 'AT', 'NN', 'IN', 'VBG', 'CC', 'VBG', 'PPO', "''", '.'], ['AT', 'JJ', 'NN', 'VBD', 'IN', 'AT', 'NN', 'IN', 'AP', 'NNS', ',', 'IN', 'PPO', 'AT', 'NP', 'CC', 'NP-TL', 'NN-TL', 'VBG', 'NNS', 'WDT', 'PPS', 'VBD', '``', 'BER', 'QL', 'VBN', 'CC', 'VB', 'RB', 'VBN', 'NNS', 'WDT', 'VB', 'IN', 'AT', 'JJT', 'NN', 'IN', 'ABX', 'NNS', "''", '.'], ['NN-HL', 'VBN-HL'], ['WRB', ',', 'AT', 'NN', 'VBD', 'PPS', 'VBZ', '``', 'DTS', 'CD', 'NNS', 'MD', 'BE', 'VBN', 'TO', 'VB', 'JJR', 'NN', 'CC', 'VB', 'AT', 'NN', 'IN', 'NN', "''", '.'], ['AT', 'NN-TL', 'VBG-TL', 'NN-TL', ',', 'AT', 'NN', 'VBD', ',', '``', 'BEZ', 'VBG', 'IN', 'VBN', 'JJ', 'NNS', 'CS', 'AT', 'NN', 'IN', 'NN', 'NNS', 'NNS', "''", '.']])
619 # # 36:'AT' in g_brown.numtag, 40:'NP-TL'
621 # io.DEBUG = 0
622 # test_brown = inner_dmv(0,2, (LRBAR,36), g_brown, ['AT', 'NP-TL' ,'NN-TL','JJ-TL'], {})
623 # if io.DEBUG:
624 # for r in g_brown.rules((2,36)) + g_brown.rules((1,36)) + g_brown.rules((0,36)):
625 # L = r.L()
626 # R = r.R()
627 # if head(L) in [36,40,-2] and head(R) in [36,40,-2]:
628 # print r
629 # print "Brown-test gives: %.8f" % test_brown[0]
633 # this will give the tag sequences of all the 6218 Brown corpus
634 # sentences of length < 7:
635 # [[tag for (w, tag) in sent]
636 # for sent in nltk.corpus.brown.tagged_sents() if len(sent) < 7]