3 # - outer() seems to be working, wrote c(s,t,LHS,loc_h,...) too now.
6 # - moved prune() to junk.py, now using outer() instead. outer() is
7 # written, but needs testing.
10 # - prune() finished, seems to be working.
11 # - started on implementing the other reestimation formulas, in
15 # - moved initialization to harmonic.py
18 # - fixed a number of little bugs in initialization, where certain
19 # rules were simply not created, or created "backwards"
20 # - dmv.inner() should Work now...
23 # - finished typing in dmv.inner(), still have to test and debug
24 # it. The ichart is now four times as big since for any rule we may
25 # have attachments to either the left or the right below, which
26 # upper rules depend on, for selecting probN or probA
29 # - copied inner() into this file, to make the very dmv-specific
30 # adjacency stuff work (have to factor that out later on, when it
34 # - init_normalize is done, it creates p_STOP, p_ROOT and p_CHOOSE,
35 # and also adds the relevant probabilities to p_rules in a grammar.
36 # Still, each individual rule has to store both adjacent and non_adj
37 # probabilities, and inner() should be able to send some parameter
38 # which lets the rule choose... hopefully... Is this possible to do
39 # top-down even? when the sentence could be all the same words?
40 # todo: extensive testing of identical words in sentences!
41 # - frequencies (only used in initialization) are stored as strings,
42 # but in the rules and p_STOP etc, there are only numbers.
45 # - more work on initialization (init_freq and init_normalize),
46 # getting closer to probabilities now.
49 # - started on initialization. So far, I have frequencies for
50 # everything, very harmonic. Still need to make these into 1-summing
54 # - prettier printout for DMV_Rule
55 # - DMV_Rule changed a bit. head, L and R are now all pairs of the
57 # - Started on P_STOP, a bit less pseudo now..
61 #import numpy # numpy provides Fast Arrays, for future optimization
64 # non-tweakable/constant "lookup" globals
69 # probably need these for combined model, see thesis-appendix:
72 SEALS
= [GO_R
, RGO_L
, SEAL
, GO_L
, LGO_R
]
77 if __name__
== "__main__":
78 print "DMV module tests:"
81 def node(seals
, head
):
82 '''Useless function, but just here as documentation. Nodes make up
83 LHS, R and L in each DMV_Rule'''
93 class DMV_Grammar(io
.Grammar
):
97 p_STOP, p_ROOT, p_CHOOSE, p_terminals
98 These are changed in the Maximation step, then used to set the
99 new probabilities of each DMV_Rule.
101 Todo: make p_terminals private? (But it has to be changable in
102 maximation step due to the short-cutting rules... could of course
103 make a DMV_Grammar function to update the short-cut rules...)
105 __p_rules is private, but we can still say stuff like:
106 for r in g.all_rules():
109 What other representations do we need? (P_STOP formula uses
110 deps_D(h,l/r) at least)'''
113 for r
in self
.all_rules():
114 str += "%s\n" % r
.__str
__(self
.numtag
)
117 def h_rules(self
, h
):
118 return [r
for r
in self
.all_rules() if r
.head() == h
]
120 def mothersL(self
, Node
, sent_nums
):
121 return [r
for r
in self
.all_rules() if r
.L() == Node
]
123 def mothersR(self
, Node
, sent_nums
):
124 return [r
for r
in self
.all_rules() if r
.R() == Node
]
126 def rules(self
, LHS
):
127 return [r
for r
in self
.all_rules() if r
.LHS() == LHS
]
129 def sent_rules(self
, LHS
, sent_nums
):
130 '''Used in dmv.inner. Todo: this takes a _lot_ of time, it
131 seems. Could use some more space and cache some of this
133 # We don't want to rule out STOPs!
134 nums
= sent_nums
+ [ head(STOP
) ]
135 return [r
for r
in self
.all_rules() if r
.LHS() == LHS
136 and head(r
.L()) in nums
and head(r
.R()) in nums
]
138 def deps_L(self
, head
): # todo: do I use this at all?
139 # todo test, probably this list comprehension doesn't work
140 return [a
for r
in self
.all_rules() if r
.head() == head
and a
== r
.L()]
142 def deps_R(self
, head
):
143 # todo test, probably this list comprehension doesn't work
144 return [a
for r
in self
.all_rules() if r
.head() == head
and a
== r
.R()]
146 def __init__(self
, p_rules
, p_terminals
, p_STOP
, p_CHOOSE
, p_ROOT
, numtag
, tagnum
):
147 io
.Grammar
.__init
__(self
, p_rules
, p_terminals
, numtag
, tagnum
)
149 self
.p_CHOOSE
= p_CHOOSE
151 self
.head_nums
= [k
for k
in numtag
.iterkeys()]
154 class DMV_Rule(io
.CNF_Rule
):
155 '''A single CNF rule in the PCFG, of the form
157 where LHS, L and R are 'nodes', eg. of the form (seals, head).
165 Different rule-types have different probabilities associated with
168 _h_ -> STOP h_ P( STOP|h,L, adj)
169 _h_ -> STOP h_ P( STOP|h,L,non_adj)
170 h_ -> h STOP P( STOP|h,R, adj)
171 h_ -> h STOP P( STOP|h,R,non_adj)
172 h_ -> _a_ h_ P(-STOP|h,L, adj) * P(a|h,L)
173 h_ -> _a_ h_ P(-STOP|h,L,non_adj) * P(a|h,L)
174 h -> h _a_ P(-STOP|h,R, adj) * P(a|h,R)
175 h -> h _a_ P(-STOP|h,R,non_adj) * P(a|h,R)
177 def p(self
, adj
, *arg
):
183 def p_STOP(self
, s
, t
, loc_h
):
184 '''Returns the correct probability, adjacent if we're rewriting from
185 the (either left or right) end of the fragment. '''
187 return self
.p(s
== loc_h
)
188 elif self
.R() == STOP
:
190 if 'TODO' in io
.DEBUG
:
191 print "(%s given loc_h:%d but s:%d. Todo: optimize away!)" % (self
, loc_h
, s
)
194 return self
.p(t
== loc_h
)
196 def p_ATTACH(self
, r
, loc_h
, s
=None):
197 '''Returns the correct probability, adjacent if we haven't attached
199 if self
.LHS() == self
.L():
201 if 'TODO' in io
.DEBUG
:
202 print "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)" % (self
, loc_h
, s
)
205 return self
.p(r
== loc_h
)
206 elif self
.LHS() == self
.R():
207 return self
.p(r
+1 == loc_h
)
210 return seals(self
.LHS())
213 return head(self
.LHS())
215 def __init__(self
, LHS
, L
, R
, probN
, probA
):
216 for b_h
in [LHS
, L
, R
]:
217 if seals(b_h
) not in SEALS
:
218 raise ValueError("seals must be in %s; was given: %s"
219 % (SEALS
, seals(b_h
)))
220 io
.CNF_Rule
.__init
__(self
, LHS
, L
, R
, probN
)
221 self
.probA
= probA
# adjacent
222 self
.probN
= probN
# non_adj
224 @classmethod # so we can call DMV_Rule.bar_str(b_h)
225 def bar_str(cls
, b_h
, tag
=lambda x
:x
):
230 elif(seals(b_h
) == RGO_L
):
231 return " %s_ " % tag(head(b_h
))
232 elif(seals(b_h
) == SEAL
):
233 return "_%s_ " % tag(head(b_h
))
235 return " %s " % tag(head(b_h
))
238 def __str__(self
, tag
=lambda x
:x
):
239 return "%s-->%s %s\t[N %.2f] [A %.2f]" % (self
.bar_str(self
.LHS(), tag
),
240 self
.bar_str(self
.L(), tag
),
241 self
.bar_str(self
.R(), tag
),
251 ###################################
252 # dmv-specific version of inner() #
253 ###################################
254 def locs(h
, sent
, s
=0, t
=None, remove
=None):
255 '''Return the locations of h in sent, or some fragment of sent (in the
256 latter case we make sure to offset the locations correctly so that
257 for any x in the returned list, sent[x]==h).
259 t is inclusive, to match the way indices work with inner()
260 (although python list-splicing has "exclusive" end indices)'''
263 return [i
+s
for i
,w
in enumerate(sent
[s
:t
+1])
264 if w
== h
and not (i
+s
) == remove
]
267 def inner(s
, t
, LHS
, loc_h
, g
, sent
, ichart
):
268 ''' A rewrite of io.inner(), to take adjacency into accord.
270 The ichart is now of this form:
271 ichart[s,t,LHS, loc_h]
273 loc_h gives adjacency (along with r and location of other child
274 for attachment rules), and is needed in P_STOP reestimation.
276 Todo: if possible, refactor (move dmv-specific stuff back into
277 dmv, so this is "general" enough to be in io.py)
283 sent_nums
= g
.sent_nums(sent
)
286 def e(s
,t
,LHS
, loc_h
, n_t
):
288 "Tabs for debug output"
291 if (s
, t
, LHS
, loc_h
) in ichart
:
292 if 'INNER' in io
.DEBUG
:
293 print "%s*= %.4f in ichart: s:%d t:%d LHS:%s loc:%d" % (tab(),ichart
[s
, t
, LHS
, loc_h
], s
, t
,
294 DMV_Rule
.bar_str(LHS
), loc_h
)
295 return ichart
[s
, t
, LHS
, loc_h
]
297 if s
== t
and seals(LHS
) == GO_R
:
299 if 'INNER' in io
.DEBUG
:
300 print "%s*= 0.0 (wrong loc_h)" % tab()
302 elif (LHS
, O(s
)) in g
.p_terminals
:
303 prob
= g
.p_terminals
[LHS
, O(s
)] # "b[LHS, O(s)]" in Lari&Young
305 # todo: assuming this is how to deal w/lacking
306 # rules, since we add prob.s, and 0 is identity
308 if 'INNER' in io
.DEBUG
:
309 print "%sLACKING TERMINAL:" % tab()
310 # todo: add to ichart perhaps? Although, it _is_ simple lookup..
311 if 'INNER' in io
.DEBUG
:
312 print "%s*= %.4f (terminal: %s -> %s_%d)" % (tab(),prob
, DMV_Rule
.bar_str(LHS
), O(s
), loc_h
)
315 p
= 0.0 # "sum over j,k in a[LHS,j,k]"
316 for rule
in g
.sent_rules(LHS
, sent_nums
):
317 if 'INNER' in io
.DEBUG
:
318 print "%ssumming rule %s s:%d t:%d loc:%d" % (tab(),rule
,s
,t
,loc_h
)
321 if (s
,t
,LHS
,loc_h
) not in tree
:
322 tree
[s
,t
,LHS
,loc_h
] = set()
323 if loc_h
== t
and rule
.LHS() == L
:
324 continue # todo: speed-test
325 if loc_h
== s
and rule
.LHS() == R
:
327 # if it's a STOP rule, rewrite for the same range:
328 if (L
== STOP
) or (R
== STOP
):
330 pLR
= e(s
, t
, R
, loc_h
, n_t
+1)
332 tree
[s
,t
,LHS
,loc_h
].add((s
,t
,R
,loc_h
))
334 pLR
= e(s
, t
, L
, loc_h
, n_t
+1)
336 tree
[s
,t
,LHS
,loc_h
].add((s
,t
,L
,loc_h
))
337 p
+= rule
.p_STOP(s
, t
, loc_h
) * pLR
338 if 'INNER' in io
.DEBUG
:
339 print "%sp= %.4f (STOP)" % (tab(), p
)
341 elif t
> s
: # not a STOP, attachment rewrite:
342 rp_ATTACH
= rule
.p_ATTACH
# todo: profile/speedtest
343 for r
in xrange(s
, t
):
344 p_h
= rp_ATTACH(r
, loc_h
, s
=s
)
347 locs_R
= locs(head(R
), sent_nums
, r
+1, t
, loc_h
)
348 elif rule
.LHS() == R
:
349 locs_L
= locs(head(L
), sent_nums
, s
, r
, loc_h
)
352 pL
= e(s
, r
, L
, loc_L
, n_t
+1)
355 pR
= e(r
+1, t
, R
, loc_R
, n_t
+1)
356 if pR
> 0.0: # and pL > 0.0
357 tree
[s
,t
,LHS
,loc_h
].add(( s
,r
,L
,loc_L
))
358 tree
[s
,t
,LHS
,loc_h
].add((r
+1,t
,R
,loc_R
))
360 if 'INNER' in io
.DEBUG
:
361 print "%sp= %.4f (ATTACH)" % (tab(), p
)
362 ichart
[s
, t
, LHS
, loc_h
] = p
366 inner_prob
= e(s
,t
,LHS
,loc_h
, 0)
368 if 'INNER' in io
.DEBUG
:
369 print debug_ichart(g
,sent
,ichart
)
371 # end of dmv.inner(s, t, LHS, loc_h, g, sent, ichart)
374 def debug_ichart(g
,sent
,ichart
):
375 str = "---ICHART:---\n"
376 for (s
,t
,LHS
,loc_h
),v
in ichart
.iteritems():
377 if type(v
) == dict: # skip 'tree'
379 str += "%s -> %s_%d ... %s_%d (loc_h:%s):\t%.4f\n" % (DMV_Rule
.bar_str(LHS
,g
.numtag
),
380 sent
[s
], s
, sent
[s
], t
, loc_h
, v
)
381 str += "---ICHART:end---\n"
385 def inner_sent(g
, sent
, ichart
):
386 return sum([inner(0, len(sent
)-1, ROOT
, loc_h
, g
, sent
, ichart
)
387 for loc_h
in xrange(len(sent
))])
390 def c(s
,t
,LHS
,loc_h
,g
,sent
,ichart
,ochart
):
391 # assuming P_sent = P(D(ROOT)) = inner(sent). todo: check K&M about this
392 p_sent
= inner_sent(g
, sent
, ichart
)
393 p_in
= inner(s
,t
,LHS
,loc_h
,g
,sent
,ichart
)
394 p_out
= outer(s
,t
,LHS
,loc_h
,g
,sent
,ichart
,ochart
)
396 return p_in
* p_out
/ p_sent
400 ###################################
401 # dmv-specific version of outer() #
402 ###################################
403 def outer(s
,t
,Node
,loc_N
, g
, sent
, ichart
, ochart
):
404 ''' http://www.student.uib.no/~kun041/dmvccm/DMVCCM.html#outer
406 def e(s
,t
,LHS
,loc_h
):
407 # or we could just look it up in ichart, assuming ichart to be done
408 return inner(s
, t
, LHS
, loc_h
, g
, sent
, ichart
)
411 sent_nums
= g
.sent_nums(sent
)
413 def f(s
,t
,Node
,loc_N
):
414 if (s
,t
,Node
) in ochart
:
415 return ochart
[(s
, t
, Node
,loc_N
)]
417 if s
== 0 and t
== T
:
419 else: # ROOT may only be used on full sentence
420 return 0.0 # but we may have non-ROOTs over full sentence too
423 for mom
in g
.mothersL(Node
, sent_nums
): # mom.L() == Node
427 p
+= f(s
,t
,mLHS
,loc_N
) * mom
.p_STOP(s
,t
,loc_N
) # == loc_m
429 if seals(mLHS
) == RGO_L
: # left attachment, head(mLHS) == head(L)
430 for r
in xrange(t
+1,T
+1): # t+1 to lasT
431 for loc_m
in locs(head(mLHS
),sent_nums
,t
+1,r
):
432 p_m
= mom
.p(t
+1 == loc_m
)
433 p
+= f(s
,r
,mLHS
,loc_m
) * p_m
* e(t
+1,r
,R
,loc_m
)
434 else: # right attachment, head(mLHS) == head(Node)
436 p_m
= mom
.p( t
== loc_m
)
437 for r
in xrange(t
+1,T
+1): # t+1 to lasT
438 for loc_R
in locs(head(mLHS
),sent_nums
,t
+1,r
):
439 p
+= f(s
,r
,mLHS
,loc_m
) * p_m
* e(t
+1,r
,R
,loc_R
)
441 for mom
in g
.mothersR(Node
, sent_nums
):
445 p
+= f(s
,t
,mLHS
,loc_N
) * mom
.p_STOP(s
,t
,loc_N
) # == loc_m
447 if seals(mLHS
) == RGO_L
: # left attachment, head(mLHS) == head(Node)
449 p_m
= mom
.p( s
== loc_m
)
450 for r
in xrange(0,s
): # first to s-1
451 for loc_L
in locs(head(L
),sent_nums
,r
,s
-1):
452 p
+= e(r
,s
-1,L
, loc_L
) * p_m
* f(r
,t
,mLHS
,loc_m
)
453 else: # right attachment, head(mLHS) == head(R)
454 for r
in xrange(0,s
): # first to s-1
455 for loc_m
in locs(head(mLHS
),sent_nums
,r
,s
-1):
456 p_m
= mom
.p(s
-1 == loc_m
)
457 p
+= e(r
,s
-1,L
, loc_m
) * p_m
* f(r
,t
,mLHS
,loc_m
)
458 ochart
[s
,t
,Node
,loc_N
] = p
462 return f(s
,t
,Node
,loc_N
)
463 # end outer(s,t,Node,loc_N, g,sent, ichart,ochart)
467 ##############################
468 # reestimation, todo: #
469 ##############################
470 def reestimate_zeros(h_nums
):
471 # todo: p_ROOT, p_CHOOSE, p_terminals
474 f
[('LNSTOP','num',h
)] = 0.0
475 f
[('LNSTOP','den',h
)] = 0.0
476 f
[('LASTOP','num',h
)] = 0.0
477 f
[('LASTOP','den',h
)] = 0.0
478 f
[('RNSTOP','num',h
)] = 0.0
479 f
[('RNSTOP','den',h
)] = 0.0
480 f
[('RASTOP','num',h
)] = 0.0
481 f
[('RASTOP','den',h
)] = 0.0
484 def reestimate(g
, corpus
):
486 P_STOP(-STOP|...) = 1 - P_STOP(STOP|...) '''
487 f
= reestimate_zeros(g
.head_nums
)
490 def c_g(s
,t
,LHS
,loc_h
,sent
):
491 return c(s
,t
,LHS
,loc_h
,g
,sent
,ichart
,ochart
)
493 if 'reest' in io
.DEBUG
:
495 sent_nums
= g
.sent_nums(sent
)
498 for loc_h
,h
in enumerate(sent_nums
):
499 for t
in xrange(loc_h
, len(sent
)):
500 for s
in xrange(loc_h
): # s<loc(h), range gives strictly less
501 # left non-adjacent stop
502 f
[('LNSTOP','num',h
)] += c_g(s
, t
, (SEAL
, h
), loc_h
,sent
)
503 f
[('LNSTOP','den',h
)] += c_g(s
, t
, (RGO_L
,h
), loc_h
,sent
)
505 f
[('LASTOP','num',h
)] += c_g(loc_h
, t
, (SEAL
, h
), loc_h
,sent
)
506 f
[('LASTOP','den',h
)] += c_g(loc_h
, t
, (RGO_L
,h
), loc_h
,sent
)
507 for t
in xrange(loc_h
+1, len(sent
)):
508 # right non-adjacent stop
509 f
[('RNSTOP','num',h
)] += c_g(loc_h
, t
, (RGO_L
,h
), loc_h
,sent
)
510 f
[('RNSTOP','den',h
)] += c_g(loc_h
, t
, (GO_R
, h
), loc_h
,sent
)
511 f
[('RASTOP','num',h
)] += c_g(loc_h
, loc_h
, (RGO_L
,h
), loc_h
,sent
)
512 f
[('RASTOP','den',h
)] += c_g(loc_h
, loc_h
, (GO_R
, h
), loc_h
,sent
)
514 # todo: use sum([ichart[s, t...] etc? but can we then
515 # keep den and num separate within _one_ sum()-call? use map?
517 # we want to go through only non-ROOT left-STOPs..
518 for r
in g
.all_rules():
519 if r
.L() == STOP
and not r
.LHS() == ROOT
:
521 if 'reest' in io
.DEBUG
:
524 if f
[('LNSTOP','den',h
)] > 0.0:
525 r
.probN
= f
[('LNSTOP','num',h
)] / f
[('LNSTOP','den',h
)]
527 r
.probN
= 0.0 # or..remove rule? todo
528 if f
[('LASTOP','den',h
)] > 0.0:
529 r
.probA
= f
[('LASTOP','num',h
)] / f
[('LASTOP','den',h
)]
531 r
.probA
= 0.0 # or..remove rule? todo
532 if 'reest' in io
.DEBUG
:
533 print "p(STOP|%d=%s,L,N): %.4f / %.4f = %.4f (was: %.4f)"%(h
,g
.numtag(h
),
534 f
[('LNSTOP','num',h
)],
535 f
[('LNSTOP','den',h
)],
538 print "p(STOP|%d=%s,L,A): %.4f / %.4f = %.4f (was: %.4f)"%(h
,g
.numtag(h
),
539 f
[('LASTOP','num',h
)],
540 f
[('LASTOP','den',h
)],
543 if r
.R() == STOP
and not r
.LHS() == ROOT
:
545 if 'reest' in io
.DEBUG
:
548 if f
[('RNSTOP','den',h
)] > 0.0:
549 r
.probN
= f
[('RNSTOP','num',h
)] / f
[('RNSTOP','den',h
)]
551 r
.probN
= 0.0 # or..remove rule? todo
552 if f
[('RASTOP','den',h
)] > 0.0:
553 r
.probA
= f
[('RASTOP','num',h
)] / f
[('RASTOP','den',h
)]
555 r
.probA
= 0.0 # or..remove rule? todo
556 if 'reest' in io
.DEBUG
:
557 print "p(STOP|%d=%s,R,N): %.4f / %.4f = %.4f (was: %.4f)"%(h
,g
.numtag(h
),
558 f
[('RNSTOP','num',h
)],
559 f
[('RNSTOP','den',h
)],
562 print "p(STOP|%d=%s,R,A): %.4f / %.4f = %.4f (was: %.4f)"%(h
,g
.numtag(h
),
563 f
[('RASTOP','num',h
)],
564 f
[('RASTOP','den',h
)],
575 ##############################
576 # testing functions: #
577 ##############################
579 testcorpus
= [s
.split() for s
in ['det nn vbd c vbd','vbd nn c vbd',
580 'det nn vbd', 'det nn vbd c pp',
581 'det nn vbd', 'det vbd vbd c pp',
582 'det nn vbd', 'det nn vbd c vbd',
583 'det nn vbd', 'det nn vbd c vbd',
584 'det nn vbd', 'det nn vbd c vbd',
585 'det nn vbd', 'det nn vbd c pp',
586 'det nn vbd pp', 'det nn vbd', ]]
590 return harmonic
.initialize(testcorpus
)
592 def testreestimation():
593 io
.DEBUG
.add('reest')
595 reestimate(g
, testcorpus
)
599 def testgrammar_a(): # Non, Adj
600 _h_
= DMV_Rule((SEAL
,0), STOP
, ( RGO_L
,0), 0.9, 0.9) # LSTOP
601 h_S
= DMV_Rule(( RGO_L
,0),(GO_R
,0), STOP
, 0.4, 0.3) # RSTOP
602 h_A
= DMV_Rule(( RGO_L
,0),(SEAL
,0),( RGO_L
,0),0.2, 0.1) # Lattach
603 h_Aa
= DMV_Rule(( RGO_L
,0),(SEAL
,1),( RGO_L
,0),0.4, 0.6) # Lattach to a
604 h
= DMV_Rule((GO_R
,0),(GO_R
,0),(SEAL
,0), 1.0, 1.0) # Rattach
605 ha
= DMV_Rule((GO_R
,0),(GO_R
,0),(SEAL
,1), 1.0, 1.0) # Rattach to a
606 rh
= DMV_Rule( ROOT
, STOP
, (SEAL
,0), 1.0, 1.0) # ROOT
608 _a_
= DMV_Rule((SEAL
,1), STOP
, ( RGO_L
,1), 1.0, 1.0) # LSTOP
609 a_S
= DMV_Rule(( RGO_L
,1),(GO_R
,1), STOP
, 0.4, 0.3) # RSTOP
610 a_A
= DMV_Rule(( RGO_L
,1),(SEAL
,1),( RGO_L
,1),0.4, 0.6) # Lattach
611 a_Ah
= DMV_Rule(( RGO_L
,1),(SEAL
,0),( RGO_L
,1),0.2, 0.1) # Lattach to h
612 a
= DMV_Rule((GO_R
,1),(GO_R
,1),(SEAL
,1), 1.0, 1.0) # Rattach
613 ah
= DMV_Rule((GO_R
,1),(GO_R
,1),(SEAL
,0), 1.0, 1.0) # Rattach to h
614 ra
= DMV_Rule( ROOT
, STOP
, (SEAL
,1), 0.1, 0.1) # ROOT
617 b2
[(GO_R
, 0), 'h'] = 1.0
618 b2
[(GO_R
, 1), 'a'] = 1.0
620 return DMV_Grammar([ h_Aa
, ha
, a_Ah
, ah
, ra
, _a_
, a_S
, a_A
, a
, rh
, _h_
, h_S
, h_A
, h
],b2
,0,0,0, {0:'h',1:'a'}, {'h':0,'a':1})
624 def testgrammar_h(): # Non, Adj
625 _h_
= DMV_Rule((SEAL
,0), STOP
, ( RGO_L
,0), 1.0, 1.0) # LSTOP
626 h_S
= DMV_Rule(( RGO_L
,0),(GO_R
,0), STOP
, 0.4, 0.3) # RSTOP
627 h_A
= DMV_Rule(( RGO_L
,0),(SEAL
,0),( RGO_L
,0), 0.6, 0.7) # Lattach
628 h
= DMV_Rule((GO_R
,0),(GO_R
,0),(SEAL
,0), 1.0, 1.0) # Rattach
629 rh
= DMV_Rule( ROOT
, STOP
, (SEAL
,0), 1.0, 1.0) # ROOT
631 b2
[(GO_R
, 0), 'h'] = 1.0
633 return DMV_Grammar([ rh
, _h_
, h_S
, h_A
, h
],b2
,0,0,0, {0:'h'}, {'h':0})
636 def testreestimation_h():
637 io
.DEBUG
.add('reest')
639 reestimate(g
,['h h h'.split()])
641 def regression_tests():
642 g_dup
= testgrammar_h()
644 test0
= inner(0, 1, (SEAL
,0), 0, g_dup
, 'h h'.split(), {})
645 if not "0.120"=="%.3f" % test0
:
646 print "Should be 0.120: %.3f" % test0
648 test1
= inner(0, 1, (SEAL
,0), 1, g_dup
, 'h h'.split(), {})
649 if not "0.063"=="%.3f" % test1
:
650 print "Should be 0.063: %.3f" % test1
652 test3
= inner(0, 2, (SEAL
,0), 2, g_dup
, 'h h h'.split(), {})
653 if not "0.0498"=="%.4f" % test3
:
654 print "Should be 0.0498: %.4f" % test3
656 test4
= outer(1,2,(1,0),2,testgrammar_h(),'h h h'.split(),{},{})
657 if not "0.58" == "%.2f" % test4
:
658 print "Should be 0.58: %.2f" % test4
660 if __name__
== "__main__":
663 # profile.run('testreestimation()')
664 # print timeit.Timer("dmv.testreestimation()",'''import dmv
665 # reload(dmv)''').timeit(1)
666 # testreestimation_h()
669 print "outer(0,0,(1,0),0,testgrammar_a(),'h a'.split(),{},{}):"
670 print outer(0,0,(1,0),0,testgrammar_a(),'h a'.split(),{},{})
671 print "outer(0,0,(0,0),0,testgrammar_a(),'h a'.split(),{},{}):"
672 print outer(0,0,(0,0),0,testgrammar_a(),'h a'.split(),{},{})
675 #print "testreestimation():"