From 370757123e961d19d28bd03ef21770b6520d2a3f Mon Sep 17 00:00:00 2001
From: Kevin Brubeck Unhammer <pixiemotion@gmail.com>
Date: Sat, 7 Jun 2008 18:20:20 +0200
Subject: [PATCH] Ok, so I misunderstood the word adjacent all along. Redone
 now.

---
 src/dmv.py | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/dmv.py b/src/dmv.py
index c67e284..9d97ed9 100755
--- a/src/dmv.py
+++ b/src/dmv.py
@@ -160,9 +160,8 @@ class DMV_Rule(io.CNF_Rule):
             return self.probN
         
     def p_STOP(self, s, t, loc_h):
-        '''Returns the correct probability, adjacent or non-adjacent,
-        depending on whether or not we're at the (either left or
-        right) end of the fragment. '''
+        '''Returns the correct probability, adjacent if we're rewriting from
+        the (either left or right) end of the fragment. '''
         if self.L() == STOP:
             return self.p(s == loc_h)
         elif self.R() == STOP:
@@ -173,17 +172,18 @@ class DMV_Rule(io.CNF_Rule):
             else:
                 return self.p(t == loc_h)
             
-    def p_ATTACH(self, r, loc_L, loc_R, s=None):
-        '''Returns the correct probability, adjacent or non-adjacent,
-        depending on whether or not there is a some lower attachment
-        either on the right side of the left child, or the left side
-        of the right child. '''
-        if self.LHS() == self.L() and not loc_L == s:
-            io.debug( "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)"
-                      % (self, loc_L, s) )
-            return 0.0
-        else:
-            return self.p(r  == loc_L and r+1 == loc_R)
+    def p_ATTACH(self, r, loc_h, s=None):
+        '''Returns the correct probability, adjacent if we haven't attached
+        anything before.'''
+        if self.LHS() == self.L():
+            if not loc_L == s:
+                io.debug( "(%s given loc_h (loc_L):%d but s:%d. Todo: optimize away!)"
+                          % (self, loc_L, s) )
+                return 0.0
+            else:
+                return self.p(r == loc_h)
+        elif self.LHS() == self.R():
+            return self.p(r+1 == loc_h)
         
     def bars(self):
         return bars(self.LHS())
@@ -301,6 +301,7 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart):
 
                     else: # not a STOP, an attachment rewrite:
                         for r in range(s, t):
+                            p_h = rule.p_ATTACH(r, loc_h, s=s)
                             if rule.LHS() == L: 
                                 locs_L = [loc_h]
                                 locs_R = locs(head(R), sent_nums, r+1, t+1)
@@ -309,7 +310,7 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart):
                                 locs_R = [loc_h]
                             # see http://tinyurl.com/4ffhhw 
                             p += sum([e(s, r, L, loc_L, n_t+1) *
-                                      rule.p_ATTACH(r, loc_L, loc_R, s=s) *
+                                      p_h *
                                       e(r+1, t, R, loc_R, n_t+1)
                                       for loc_L in locs_L
                                       for loc_R in locs_R])
@@ -328,6 +329,11 @@ def inner_dmv(s, t, LHS, loc_h, g, sent, chart):
     return [inner_prob, chart]
 # end of inner_dmv(s, t, LHS, loc_h, g, sent, chart)
 
+def inner_sent_dmv(sent, g, chart):
+    '''Possibly there's a more efficient way? Although, non-sentence heads
+    _will_ be ruled out by inner_dmv though.'''
+    for loc_h,h_tag in enumerate(sent):
+        inner_dmv(0, len(sent), ROOT, loc_h, g, chart)
 
 if __name__ == "__main__":                      # Non, Adj
     _h_ = DMV_Rule((LRBAR,0), STOP,    ( RBAR,0), 1.0, 1.0) # LSTOP
@@ -422,15 +428,17 @@ P_STOP(-STOP|...) = 1 - P_STOP(STOP|...)
     for sent in corpus:
         # have to go through _all_ places where h appears in the
         # sentence...how? how to make sure it _works_?
-        chart = {} # cuts time from 17s to 7s !
+        chart = {} 
+        inner_sent_dmv(sent, g, chart) #todo current
         if h_tag in sent:
             locs_h = locs(h_tag, sent)
+
             io.debug( "locs_h:%s, sent:%s"%(locs_h,sent) )
             for loc_h in locs_h:
                 for s in range(loc_h): # s<loc(h), range gives strictly less
-                    for t in range(loc_h, len(sent)): # should not be range(s,..), right? todo
-                        P_STOP_num += inner_dmv(s, t, (LRBAR,h), loc_h, g, sent, chart)[0]
-                        P_STOP_den += inner_dmv(s, t, (RBAR,h), loc_h, g, sent, chart)[0]
+                    for t in range(loc_h, len(sent)):
+                        P_STOP_num += chart[(s, t, (LRBAR,h), loc_h)]
+                        P_STOP_den += chart[(s, t, (RBAR,h), loc_h)]
                         
     io.debug( "num/den: %s / %s = %s"%(P_STOP_num, P_STOP_den,P_STOP_num / P_STOP_den))
     if P_STOP_den > 0.0:
-- 
2.11.4.GIT