From 370757123e961d19d28bd03ef21770b6520d2a3f Mon Sep 17 00:00:00 2001 From: Kevin Brubeck Unhammer Date: Sat, 7 Jun 2008 18:20:20 +0200 Subject: [PATCH] Ok, so I misunderstood the word adjacent all along. Redone now. --- src/dmv.py | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/dmv.py b/src/dmv.py index c67e284..9d97ed9 100755 --- a/src/dmv.py +++ b/src/dmv.py @@ -160,9 +160,8 @@ class DMV_Rule(io.CNF_Rule): return self.probN def p_STOP(self, s, t, loc_h): - '''Returns the correct probability, adjacent or non-adjacent, - depending on whether or not we're at the (either left or - right) end of the fragment. ''' + '''Returns the correct probability, adjacent if we're rewriting from + the (either left or right) end of the fragment. ''' if self.L() == STOP: return self.p(s == loc_h) elif self.R() == STOP: @@ -173,17 +172,18 @@ class DMV_Rule(io.CNF_Rule): else: return self.p(t == loc_h) - def p_ATTACH(self, r, loc_L, loc_R, s=None): - '''Returns the correct probability, adjacent or non-adjacent, - depending on whether or not there is a some lower attachment - either on the right side of the left child, or the left side - of the right child. ''' - if self.LHS() == self.L() and not loc_L == s: - io.debug( "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)" - % (self, loc_L, s) ) - return 0.0 - else: - return self.p(r == loc_L and r+1 == loc_R) + def p_ATTACH(self, r, loc_h, s=None): + '''Returns the correct probability, adjacent if we haven't attached + anything before.''' + if self.LHS() == self.L(): + if not loc_L == s: + io.debug( "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)" + % (self, loc_L, s) ) + return 0.0 + else: + return self.p(r == loc_h) + elif self.LHS() == self.R(): + return self.p(r+1 == loc_h) def bars(self): return bars(self.LHS()) @@ -301,6 +301,7 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart): else: # not a STOP, an attachment rewrite: for r in range(s, t): + p_h = rule.p_ATTACH(r, loc_h, s=s) if rule.LHS() == L: locs_L = [loc_h] locs_R = locs(head(R), sent_nums, r+1, t+1) @@ -309,7 +310,7 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart): locs_R = [loc_h] # see http://tinyurl.com/4ffhhw p += sum([e(s, r, L, loc_L, n_t+1) * - rule.p_ATTACH(r, loc_L, loc_R, s=s) * + p_h * e(r+1, t, R, loc_R, n_t+1) for loc_L in locs_L for loc_R in locs_R]) @@ -328,6 +329,11 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart): return [inner_prob, chart] # end of inner_dmv(s, t, LHS, loc_h, g, sent, chart) +def inner_sent_dmv(sent, g, chart): + '''Possibly there's a more efficient way? Although, non-sentence heads + _will_ be ruled out by inner_dmv though.''' + for loc_h,h_tag in enumerate(sent): + inner_dmv(0, len(sent), ROOT, loc_h, g, chart) if __name__ == "__main__": # Non, Adj _h_ = DMV_Rule((LRBAR,0), STOP, ( RBAR,0), 1.0, 1.0) # LSTOP @@ -422,15 +428,17 @@ P_STOP(-STOP|...) = 1 - P_STOP(STOP|...) for sent in corpus: # have to go through _all_ places where h appears in the # sentence...how? how to make sure it _works_? - chart = {} # cuts time from 17s to 7s ! + chart = {} + inner_sent_dmv(sent, g, chart) #todo current if h_tag in sent: locs_h = locs(h_tag, sent) + io.debug( "locs_h:%s, sent:%s"%(locs_h,sent) ) for loc_h in locs_h: for s in range(loc_h): # s 0.0: -- 2.11.4.GIT