From 4caa76786f42c2fcaa8229c6f0b457ff7ad53ca3 Mon Sep 17 00:00:00 2001 From: Kevin Brubeck Unhammer Date: Mon, 25 Oct 2010 14:10:19 +0200 Subject: [PATCH] remove backups, chmod -x --- src/cnf_dmv.py | 0 src/io.py.~11~ | 251 ---------------------------------- src/io.py.~12~ | 251 ---------------------------------- src/io.py.~13~ | 256 ----------------------------------- src/io.py.~14~ | 264 ------------------------------------ src/io.py.~15~ | 271 ------------------------------------- src/io.py.~16~ | 272 ------------------------------------- src/io.py.~1~ | 235 -------------------------------- src/io.py.~2~ | 237 -------------------------------- src/io.pyc | Bin 7426 -> 8028 bytes src/junk.py | 0 src/junk.py.~1~ | 28 ---- src/loc_h_dmv.pyc | Bin 37205 -> 39001 bytes src/pcnf_dmv.py | 0 tex/DMVCCM.org.~1~ | 386 ----------------------------------------------------- tex/test.org.~1~ | 15 --- tex/test.tex.~1~ | 49 ------- 17 files changed, 2515 deletions(-) mode change 100755 => 100644 src/cnf_dmv.py delete mode 100755 src/io.py.~11~ delete mode 100755 src/io.py.~12~ delete mode 100755 src/io.py.~13~ delete mode 100755 src/io.py.~14~ delete mode 100755 src/io.py.~15~ delete mode 100755 src/io.py.~16~ delete mode 100755 src/io.py.~1~ delete mode 100755 src/io.py.~2~ rewrite src/io.pyc (74%) mode change 100755 => 100644 src/junk.py delete mode 100755 src/junk.py.~1~ rewrite src/loc_h_dmv.pyc (85%) mode change 100755 => 100644 src/pcnf_dmv.py delete mode 100755 tex/DMVCCM.org.~1~ delete mode 100755 tex/test.org.~1~ delete mode 100644 tex/test.tex.~1~ diff --git a/src/cnf_dmv.py b/src/cnf_dmv.py old mode 100755 new mode 100644 diff --git a/src/io.py.~11~ b/src/io.py.~11~ deleted file mode 100755 index fd34f25..0000000 --- a/src/io.py.~11~ +++ /dev/null @@ -1,251 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - head -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R == other.R and self.L == other.L - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R != other.R or self.L != other.L - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.head, self.L, self.R, self.prob) - def __init__(self, head, L, R, prob): - self.head = head - self.R = R - self.L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.head - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def rules(self, head): - return [rule for rule in self.p_rules if rule.head == head] - - def __init__(self, p_rules, p_terminals): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.p_rules = p_rules # could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - - - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L - R = rule.R - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p() - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~12~ b/src/io.py.~12~ deleted file mode 100755 index 7cbb25f..0000000 --- a/src/io.py.~12~ +++ /dev/null @@ -1,251 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - head -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R == other.R and self.L == other.L - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R != other.R or self.L != other.L - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.head, self.L, self.R, self.prob) - def __init__(self, head, L, R, prob): - self.head = head - self.R = R - self.L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.head - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def rules(self, head): - return [rule for rule in self.p_rules if rule.head == head] - - def __init__(self, p_rules, p_terminals): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.p_rules = p_rules # could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - - - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L - R = rule.R - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p("todo","todo") - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~13~ b/src/io.py.~13~ deleted file mode 100755 index 6fd820a..0000000 --- a/src/io.py.~13~ +++ /dev/null @@ -1,256 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - LHS -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R() == other.R() and self.L() == other.L() - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R() != other.R() or self.L() != other.L() - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.LHS(), self.L(), self.R(), self.prob) - def __init__(self, LHS, L, R, prob): - self.__LHS = LHS - self.__R = R - self.__L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.__LHS - def L(self): - return self.__L - def R(self): - return self.__R - - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def rules(self, LHS): - return [rule for rule in self.p_rules if rule.LHS() == LHS] - - def __init__(self, p_rules, p_terminals): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.p_rules = p_rules # could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - - - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L() - R = rule.R() - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p("todo","todo") - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~14~ b/src/io.py.~14~ deleted file mode 100755 index 6c146e8..0000000 --- a/src/io.py.~14~ +++ /dev/null @@ -1,264 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Public members: - numtag, p_terminals - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def all_rules(self): - return self.__p_rules - - def rules(self, LHS): - return [rule for rule in self.all_rules() if rule.LHS() == LHS] - - def __init__(self, p_rules, p_terminals, numtag): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.__p_rules = p_rules # todo: could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - self.numtag = numtag - - - - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - LHS -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R() == other.R() and self.L() == other.L() - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R() != other.R() or self.L() != other.L() - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.LHS(), self.L(), self.R(), self.prob) - def __init__(self, LHS, L, R, prob): - self.__LHS = LHS - self.__R = R - self.__L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.__LHS - def L(self): - return self.__L - def R(self): - return self.__R - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L() - R = rule.R() - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p("todo","todo") - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b, {0:'s',1:'np',2:'vp',3:'n',4:'p',5:'v'}) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~15~ b/src/io.py.~15~ deleted file mode 100755 index 2b20ad7..0000000 --- a/src/io.py.~15~ +++ /dev/null @@ -1,271 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Public members: - p_terminals - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def all_rules(self): - return self.__p_rules - - def rules(self, LHS): - return [rule for rule in self.all_rules() if rule.LHS() == LHS] - - def numtag(self): - return __numtag - - def tagnum(self): - return __tagnum - - def __init__(self, p_rules, p_terminals, numtag, tagnum): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.__p_rules = p_rules # todo: could check for summing to 1 (+/- epsilon) - self.__numtag = numtag - self.__tagnum = tagnum - self.p_terminals = p_terminals - - - - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - LHS -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R() == other.R() and self.L() == other.L() - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R() != other.R() or self.L() != other.L() - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.LHS(), self.L(), self.R(), self.prob) - def __init__(self, LHS, L, R, prob): - self.__LHS = LHS - self.__R = R - self.__L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.__LHS - def L(self): - return self.__L - def R(self): - return self.__R - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L() - R = rule.R() - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p("todo","todo") - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b, {0:'s',1:'np',2:'vp',3:'n',4:'p',5:'v'}) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~16~ b/src/io.py.~16~ deleted file mode 100755 index 5388e9a..0000000 --- a/src/io.py.~16~ +++ /dev/null @@ -1,272 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 - -# some of dmv-module bleeding in here... todo: prettier (in inner()) -NOBAR = 0 -STOP = (NOBAR, -2) - -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Public members: - p_terminals - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def all_rules(self): - return self.__p_rules - - def rules(self, LHS): - return [rule for rule in self.all_rules() if rule.LHS() == LHS] - - def numtag(self): - return __numtag - - def tagnum(self): - return __tagnum - - def __init__(self, p_rules, p_terminals, numtag, tagnum): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.__p_rules = p_rules # todo: could check for summing to 1 (+/- epsilon) - self.__numtag = numtag - self.__tagnum = tagnum - self.p_terminals = p_terminals - - - - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - LHS -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R() == other.R() and self.L() == other.L() - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R() != other.R() or self.L() != other.L() - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.LHS(), self.L(), self.R(), self.prob) - def __init__(self, LHS, L, R, prob): - self.__LHS = LHS - self.__R = R - self.__L = L - self.prob = prob - def p(self, *arg): - "Return a probability, doesn't care about attachment..." - return self.prob - def LHS(self): - return self.__LHS - def L(self): - return self.__L - def R(self): - return self.__R - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - '''Chart has lists of probability and whether or not we've attached -yet to L and R, each entry is a list [p, Rattach, Lattach], where if -Rattach==True then the rule has a right-attachment or there is one -lower in the tree (meaning we're no longer adjacent).''' - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - prob = 0.0 # todo: is this the right way to deal with lacking rules? - print "\t LACKING TERMINAL:" - debug( "\t terminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - # terminals have no attachment - return prob - else: - if (s,t,LHS) not in chart: - # by default, not attachment yet - chart[(s,t,LHS)] = 0.0 #, False, False] - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L() - R = rule.R() - for r in range(s, t): # summing over r = s to r = t-1 - p_L = e(s, r, L) - p_R = e(r + 1, t, R) - p = rule.p("todo","todo") - chart[(s, t, LHS)] += p * p_L * p_R - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return [inner_prob, chart] # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b, {0:'s',1:'np',2:'vp',3:'n',4:'p',5:'v'}, - {'s':0,'np':1,'vp':2,'n':3,'p':4,'v':5}) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~1~ b/src/io.py.~1~ deleted file mode 100755 index e5e5f5b..0000000 --- a/src/io.py.~1~ +++ /dev/null @@ -1,235 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - head -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R == other.R and self.L == other.L - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R != other.R or self.L != other.L - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.head, self.L, self.R, self.prob) - def __init__(self, head, L, R, prob): - self.head = head - self.R = R - self.L = L - self.prob = prob - def LHS(self): - return self.head - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def rules(self, head): - return [rule for rule in self.p_rules if rule.head == head] - - def __init__(self, p_rules, p_terminals): - '''p_rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and p_rules are CNF_Rule's.''' - self.p_rules = p_rules # could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - - - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - # if a left stop, we need a right stop below, if a right stop, - # we need the terminal below. todo. - prob = 0.0 # todo: is this the right way to deal with lacking rules? - debug( "\tterminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - return prob - else: - if (s,t,LHS) not in chart: - chart[(s,t,LHS)] = 0.0 - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L - R = rule.R - for r in range(s, t): # summing over r = s to r = t-1 - chart[(s, t, LHS)] += rule.prob * e(s, r, L) * e(r + 1, t, R) - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return (inner_prob, chart) # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.p_rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.p_rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.p_rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.py.~2~ b/src/io.py.~2~ deleted file mode 100755 index b4da691..0000000 --- a/src/io.py.~2~ +++ /dev/null @@ -1,237 +0,0 @@ -################################################################## -# Changes: # -################################################################## -# 2008-05-24, KBU: -# - more elaborate pseudo-code in the io()-function -# -# 2008-05-25, KBU: -# - CNF_Rule has a function LHS() which returns head, in DMV_Rule this -# returns the pair of (bars, head). -# -# 2008-05-27 -# - CNF_Rule has __eq__ and __ne__ defined, so that we can use == and -# != on two such rules - - -# import numpy # numpy provides Fast Arrays, for future optimization -# import pprint # for pretty-printing - -DEBUG = 0 -def debug(string): - '''Easily turn on/off inline debug printouts with this global. There's -a lot of cluttering debug statements here, todo: clean up''' - if DEBUG: - print string - -class CNF_Rule(): - '''A single CNF rule in the PCFG, of the form - head -> L R - where these are just integers - (where do we save the connection between number and symbol? - symbols being 'vbd' etc.)''' - def __eq__(self, other): - return self.LHS() == other.LHS() and self.R == other.R and self.L == other.L - def __ne__(self, other): - return self.LHS() != other.LHS() or self.R != other.R or self.L != other.L - def __str__(self): - return "%s -> %s %s [%.2f]" % (self.head, self.L, self.R, self.prob) - def __init__(self, head, L, R, prob): - self.head = head - self.R = R - self.L = L - self.prob = prob - def p(self, s, t, g): - return self.prob - def LHS(self): - return self.head - -class Grammar(): - '''The PCFG used in the I/O-algorithm. - - Todo: as of now, this allows duplicate rules... should we check - for this? (eg. g = Grammar([x,x],[]) where x.prob == 1 may give - inner probabilities of 2.)''' - def rules(self, head): - return [rule for rule in self.rules if rule.head == head] - - def __init__(self, rules, p_terminals): - '''rules and p_terminals should be arrays, where p_terminals are of - the form [preterminal, terminal], and rules are CNF_Rule's.''' - self.rules = rules # could check for summing to 1 (+/- epsilon) - self.p_terminals = p_terminals - - - -def inner(s, t, LHS, g, sent, chart): - ''' Give the inner probability of having the node LHS cover whatever's - between s and t in sentence sent, using grammar g. - - Returns a pair of the inner probability and the chart - - For DMV, LHS is a pair (bar, h), but this function ought to be - agnostic about that. - - e() is an internal function, so the variable chart (a dictionary) - is available to all calls of e(). - - Since terminal probabilities are just simple lookups, they are not - put in the chart (although we could put them in there later to - optimize) - ''' - - def O(s): - return sent[s] - - def e(s,t,LHS): - if (s, t, LHS) in chart: - return chart[(s, t, LHS)] - else: - debug( "trying from %d to %d with %s" % (s,t,LHS) ) - if s == t: - if (LHS, O(s)) in g.p_terminals: - prob = g.p_terminals[LHS, O(s)] # b[LHS, O(s)] - else: - # if a left stop, we need a right stop below, if a right stop, - # we need the terminal below. todo. - prob = 0.0 # todo: is this the right way to deal with lacking rules? - debug( "\tterminal: %s -> %s : %.1f" % (LHS, O(s), prob) ) - return prob - else: - if (s,t,LHS) not in chart: - chart[(s,t,LHS)] = 0.0 - for rule in g.rules(LHS): # summing over j,k in a[LHS,j,k] - debug( "\tsumming rule %s" % rule ) - L = rule.L - R = rule.R - for r in range(s, t): # summing over r = s to r = t-1 - chart[(s, t, LHS)] += rule.p(s,t,g) * e(s, r, L) * e(r + 1, t, R) - debug( "\tchart[(%d,%d,%s)] = %.2f" % (s,t,LHS, chart[(s,t,LHS)]) ) - return chart[(s, t, LHS)] - # end of e-function - - inner_prob = e(s,t,LHS) - if DEBUG: - print "---CHART:---" - for k,v in chart.iteritems(): - print "\t%s -> %s_%d ... %s_%d : %.1f" % (k[2], O(k[0]), k[0], O(k[1]), k[1], v) - print "---CHART:end---" - return (inner_prob, chart) # inner_prob == chart[(s,t,LHS)] - - - - - - - - -if __name__ == "__main__": - print "IO-module tests:" - b = {} - s = CNF_Rule(0,1,2, 1.0) # s->np vp - np = CNF_Rule(1,3,4, 0.3) # np->n p - b[1, 'n'] = 0.7 # np->'n' - b[3, 'n'] = 1.0 # n->'n' - b[4, 'p'] = 1.0 # p->'p' - vp = CNF_Rule(2,5,1, 0.1) # vp->v np (two parses use this rule) - vp2 = CNF_Rule(2,2,4, 0.9) # vp->vp p - b[5, 'v'] = 1.0 # v->'v' - - g = Grammar([s,np,vp,vp2], b) - - print "The rules:" - for i in range(0,5): - for r in g.rules(i): - print r - print "" - - test1 = inner(0,0, 1, g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - DEBUG = 1 - test2 = inner(0,2, 2, g, ['v','n','p'], test1[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - print "------ trying the same again:----------" - test2 = inner(0,2, 2, g, ['v','n','p'], test2[1]) - print "should be 0.?? (.09??) : %.2f" % test2[0] - - -################################################################## -# just junk from here on down: # -################################################################## - -# def io(corpus): -# "(pseudo-code / wishful thinking) " -# g = initialize(corpus) # or corpus.tagset ? - -# P = {('v','n','p'):0.09} -# # P is used in v_q, w_q (expectation), so each sentence in the -# # corpus needs some initial P. - -# # --- Maximation: --- -# # -# # actually, this step (from Lari & Young) probably never happens -# # with DMV, since instead of the a[i,j,k] and b[i,m] vectors, we -# # have P_STOP and P_CHOOSE... or, in a sense it happens only we -# # calculate P_STOP and P_CHOOSE..somehow. -# for rule in g.rules: -# rule.num = 0 -# rule.den = 0 -# for pre_term in range(len(g.p_terminals)): -# ptnum[pre_term] = 0 -# ptden[pre_term] = 0 - -# # we could also flip this to make rules the outer loop, then we -# # wouldn't have to initialize den/num in loops of their own -# for sent in corpus: -# for rule in g.rules # Equation 20 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# rule.num += w(s,t, rule.LHS(),rule.L,rule.R, g, sent, P[sent]) -# rule.den += v(s,t, rule.LHS(), g, sent, P[sent]) -# # todo: do we need a "new-prob" vs "old-prob" distinction here? -# probably, since we use inner/outer which checks rule.prob() -# # todo: also, this wouldn't work, since for each sentence, we'd -# # discard the old probability; should rules be the outer -# # loop then? -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): # Equation 21 -# num = 0 -# den = 0 -# for s in range(len(sent)): -# for t in range(s, len(sent)): -# num += v(t,t,pre_term, g, sent, P[sent]) -# den += v(s,t,pre_term, g, sent, P[sent]) - -# for rule in g.rules: -# rule.prob = rule.num / rule.den -# for pre_term in range(len(g.p_terminals)): -# g.p_terminals[pre_term] = ptnum[pre_term] / ptden[pre_term] - - -# # --- Expectation: --- -# for sent in corpus: # Equation 11 -# inside = inner(0, len(sent), ROOT, g, sent) -# P[sent] = inside[0] - -# # todo: set inner.chart to {} again, how? - -# # todo: need a old-P new-P distinction to check if we're below -# # threshold difference -# return "todo" - -# def w(s,t, LHS,L,R, g, sent, P_sent): -# w = 0 -# rule = g.rule(LHS, L, R) -# for r in range(s, t): -# w += rule.prob() * inner(s,r, L, g, sent) * inner(r+1, t, R, g, sent) * outer(s,t,LHS,g,sent) -# return w / P_sent - -# def v(s,t, LHS, g, sent, P_sent): -# return ( inner(s,t, LHS, g, sent) * outer(s,t, LHS, g, sent) ) / P_sent - - - - - diff --git a/src/io.pyc b/src/io.pyc dissimilarity index 74% index d85f16d8b87152f74a372738a24eaff64873f3b0..f7e705ba473edc7927b99b134b86734045ec06e7 100644 GIT binary patch delta 3045 zcwU`VO>7%Q6n-;nZ|vAk)=upDFLvUj%_getG?5(Cgfyjr23n$OLy4%`q9*GG+r)`~ z5?V+aK{%umsH*1J3kZ;)5<(D89H19Yr~(eXfy9Xm5*!emd2jsFBt>+Ct$jN?^SycB z`2nlS0|VnzJg%-AwytB_zWTupFt$zGl)cd29b!*AQJHzag!xeIe;w{ zE@+U|NHt{oprAncQ)WgRQ-RadW*5>a0*E0V6`ThGg!zk^amacbnA-Aih?dZS-AkoO zPOCTgNHBE^x|UrmF1;ux{!M~h_K_5702FiM9#RGjW2M-HXi9=eP@%^cSaL(XGXK3Srb2X0?!zo#F~bN zU;OIm-SjoQu$qh9a%$zep%IQ0C|1BED~cW4&5A(vP)jaXc{9$EK>$DjBP2f8f>zwW z>FjSKb4p|X=fu~}U=Tgb^IAQ+VZ#pVcA3+-_}lqKFhF)fD5}&}a@B0PkiA|p+Qhxq z&^g+yR+@ASrB8;FM3fYWXyHJi5t^%qCB(36EbgIg+DUZm-9JnsCVupGSa5mg1J@Ee z*R11bVAts*x8(pczWj7O!u5H!cb+%q<)9$uJrfp;iw`}0n~$@tz{-3s@ngr;SecYB-`=mkfb6by<2=MkhFOq%T z-X!K~kgd_y;eQ>Un0U~h#EzzD&oW&+uXS5tQ_|AjJnU&z#-P>1b6-oLy*aNw@OW^I zW1mQS`%&k8P2aStL%53+2+AZdT}F$VlhIA0hr|&Q{UioR#7Lwb4ei6I>L7mc+AQ-< zh(Emjt#sU}#42BkZr@Wrx?S&rX~Wy~+IiT!}no z{`d&>3p(PJ-f55l&U zy2=)(rWx8$px6VVJuuy|-$V6RZq>(hBMy;E*{FO@*Fsz zbKMD@qsn+`md%y4o#8_YKTG}2@azeHl}f&RrMQ@t$dfzLGcNX0nT z*uj!h{pghpV$BMv(OnDPb|skbp$m2vVkekG*di~nl(-y;urcv^q?DR~dMC`u7Wwd# zdT}io@>5bA6QSt9xSxh{SxzQMjFX5r+?qE|(Dx*XlQc8Z;g@s2hwM3d;kCd delta 2527 zcwTi?OK%iM5U!fp9q()GeS3D-kHw4QwI#e@q}agn3<{1CIZ|7QJRrodgBcsvk6j2T zHkJZXEOCNXF5!|B5{D#GtZ>LBxo|?^7jVcuCvu3CoKn^E@H1X@`RnTLs;{f6YhIuD z>S$@>FEjS*>7OUBg7+7paf(K(@u)O!&S|jl2C)%`bCcH}J$|ru9U#;wfYv~$hZ;o! z2?iwsCW8!tE(TcwNd^{y9Msi8BwS!+f?DxMLL&`xIxOS?8$G@*e{QnTOQf3!C|}~3#XC~8qaW;)zr$PX zku@#fv)6{D&oxw=KGO~tixia=gf_sO0Si&U74RKA(|!wuX<0thi)d~<)xSe@m+7rzN!IPbLe6KXD{uCRUKAzaFFNZlhY6w0U~@(?gdOSiW_`u5wnDXekc=8i$G7XvZ;+amTpDs#_=_ zk;yAEK0oq*ABjSo-M{WAj0WzN!SNV->qVvaD~OsIf5 z@~`Mi=k44MNRZH_6g4+u$jni4nOknmOtjYM^h3Oa_LKmkQGjQdKqDI3;pS zg`vnxCt9I$^Qdj|Q%*&SP;Hw#?J6>&!RJ++lI+}#iSl+$BS*Yr%JtFBq9_lgEjQ4U zv7t_mQ;>ThS7TN=v>Iljj^?J`gjEApNM%rvD{wC?zmNTN-L*xc&CkK8AQ{-)EMxzVbsdQhUkf-fYZEP7d}1Toe5o$2`_z7I#FmN@oyCvW6CitKhX zB^Ld#HFTQ>HXbEzq6lI%39wn$d`ks5Sv5aN-U9jyd3Qnc7ZjM2Q=KQ=F#gcFU;ZAB zuKkZ`9TxXCO$&RQ)`PTh$*M;rxUj?^_~oJk`PR1EhTV95*)`tdQ3zZHF1VUsb=A^? zzDe~&7MB;U$W$!nUgzE9lL@ZuYB+Ed_no^mhGAEf-74*uNaGwM{L1zsN2*27pz?tIT2_ivbox zESQ)W0k14XZTBViDizmBh0s}6rAZiJL;jtyVll#;jMj5$WzS@|-Cm#0<0@Nb4@{9;KD2kbHj zxUOwmC`>nBA~zZ8I9v7k1|?jh@4&0>*a#Q#L)EL-b#E7~QFmBZwaAvsWSJ(9t%5#4 zr%Kl3L^FrDlS2nAbgOIcpi$=w-+87C%5SWS>F8W<+(<&Cd0}1bb_`X45y-AYAv>G@)92Z0F|oMLf= zDKkf>*|e80^OF*N;5D_|ZlS3MH6EPfBVx?Au%$l2ff?3wvx8KZZVjX{ZekcsNZ?6~ z%2T}`V@-bBTf(}0-n%q<61?unp vp - np = DMV_Rule((NOBAR,1), (NOBAR,3),(NOBAR,4), 0.3, 0.0) # np->n p - b[(NOBAR,1), 'n'] = 0.7 # np->'n' - b[(NOBAR,3), 'n'] = 1.0 # n->'n' - b[(NOBAR,4), 'p'] = 1.0 # p->'p' - vp = DMV_Rule((NOBAR,2), (NOBAR,5),(NOBAR,1), 0.1, 0.0) # vp->v np (two parses use this rule) - vp2 = DMV_Rule((NOBAR,2), (NOBAR,2),(NOBAR,4), 0.9, 0.0) # vp->vp p - b[(NOBAR,5), 'v'] = 1.0 # v->'v' - - g = DMV_Grammar([s,np,vp,vp2], b, "todo","todo", "todo") - - io.DEBUG = 0 - test1 = io.inner(0,0, (NOBAR,1), g, ['n'], {}) - if test1[0] != 0.7: - print "should be 0.70 : %.2f" % test1[0] - print "" - - test2 = io.inner(0,2, (NOBAR,2), g, ['v','n','p'], {}) - if "%.2f" % test2[0] != "0.09": # 0.092999 etc, don't care about that - print "should be 0.09 if the io.py-test is right : %.2f" % test2[0] - # the following should manage to look stuff up in the chart: - test2 = io.inner(0,2, (NOBAR,2), g, ['v','n','p'], test2[1]) - if "%.2f" % test2[0] != "0.09": - print "should be 0.09 if the io.py-test is right : %.2f" % test2[0] diff --git a/src/loc_h_dmv.pyc b/src/loc_h_dmv.pyc dissimilarity index 85% index 0081392206c284d1ecd705e3c79436ace85721fc..11efe3f7125c9a556da2734069956bd62be12215 100644 GIT binary patch literal 39001 zcwXgw33Oc7dEUM6&4R%~fLIAiB8QY9ff5LkkVH}x7f~cd%A!DdKnpS@dNjlv0E1-! z%nS%n8N>?}i*k~>iIt>w;>L;{r*V=xP2y&$)5eMIoaQub5+`obq|3?TG|NfdCe2CH zp0@w@-TRgq3@8#*+MWi$T)cPp```cm+x_ny_s`p!+ZMlcenQ!wt@3$7KII>GmGb0I zt2w1EnP*4MX>~F0D9=^1jw(HIQeF=qx|P?yfh!~Ra25#;w9!@<=w|{fZ?un zp`_lAhp7&A`f{hbbf3!Kr)CBK$oDvE>6`T>Nq&I!KA^loJ`5^v49cPB0NyVhRQVQ_Kd8KiZI=sz0j|1q2oIMIEAJ7Nm+(<_)H}$hL-KT(Pmjry zNj(yzJ|3hVm8WBTI8Q%f-yzkLe@x}2f~C(n zBG-BI}`mG^`?il{Tnd&-YGX`;?5@0=g?q(qs9pEM2k*1&8^Bjcr0Dt}5^ zY@B#nm^xEc5ea4rcViDF2^4{f@z; z53j}S*u@Laak3Y)Cof(+dFosx$(OSir#iUZGTpF+Fz&ux*y?^SRl)Y2I)>m z|7Vh*q7m5isI?T4K4rlGa{9WK`VL`DB~1sDctQuatZT0cOF?2LouDz3+kte@Ab0b+ z+)MMa#t8X7>1Cf3?i(9~)H7Px=c5olSQlMDAy7-RfXztcL7I>{d+ja^K+A%T4$``e&tgz9nfmDKZf|H6p6F+a5kYz$1Sbym)_6D zROz6qP=+%(kbpsaz)?k?!uOBKvKW&kI5vo$j!yl~fzL_OKR zyAXhSXYL6S?y%>X1JRyuF&GQDAd1uU?OuLz?Vkd)vV zqYYv4VGw%I1{*h49>iq>N1>cjL;;V3iTy&YyYeB2g{#^x1P4 zGmzE>`3jz!X;l_e7qjQ>{|qPE)PgC2)~_RQuNlgP0AjjOl#N6XsC1@*04U zL40Nx0tjmvtmX{K3aXn~C={&_$Ce4~vlq_W|CvEhLNk*)5u_0e_@zq=3l+n4WW+`a zt{6JJQ1XZ#L=>~yH^PbAMF;d&)Qn0go4pUkS-VJs?M^Du?xx&r9i6&M59(%pKz>d5 zsQe$+BYMIY$pZ!CWkoV9qSK5MQa0h{8j^&xDW(hPwx0osB1 zCcdVqxRNvi{YDAO3TRYefrGH}s@0wf#X=>U%{&0G>_NbJ!%EDwBVg$*5|r}gd?j00 zn3$HbWs2tptPnGNh9O8`stRjCrJJU^nTM6-2Izv)y*g!noo!A)ckv-m=M;fz2$vAr zYg-8sg0^3%>v*wG#RL?y&5#?k)&&Z`MXg@2SzVJgR+Ba7XN@td(QhW50Kk}3Qme}H zq9}HRYr%ktFVEz!ohg+TEW~{P#(e-%xG3j~m29wtCZ@9@QSi}J#79$d)3Bt%G8N9@ zdr(C;!P=@_y;Zjfw)+1kFev|X%5SVnM4O@bXP|u4psOHp7!|=i^58=V4kCC=)hx&( z2#z8+rtSbqIIswE&fqo1fpzIX^mY7!fEN|We6zs=RW!fTPS9!gSNr|lbO$rwA;qv) zDLvuiZGgMTa_ue)UPBEmqQyjl9WQnYI%=ZhKAa!{%m;Up5sauWHG(}rczaU8_Us)~{)0OFb`bxgClFt|St_VL$OV@Hx z8_Ma0$#iaRE{)i7x=>7)Wkl)8Qf_KqR==b+r{xEfU^qRVPs93B@p6P6~#z z^02;%g;FU$QJK3YHAzyg0)rGQ$rZ1qSI`6sl-RMn^rTR(46%V1S1unr2AHi3GrwuM zB8vyMu0nBQZn<2znwQMNVQKza4We@F`|Mo4P~0okmnAh<^jO1l*DBKs#Rtl4X>WP1 zFi|K@4UdL8R9;A2zLcxvDL|3{kaDu=%wFYls{K@oRstj0%BGQla)LyZuPI+@Po_cO zfSl+O7#Y=`_e*fYXif>xvDf1?lt@TBfJ`=wEVQ}2=Gu5`Oc9(&`c@b!>ea{^7Fqpz zT&*=hzQz(3C3X$Nl(wXy8#Ex((kV*)7#D2$Hzi14l&Le+9}^(BQ<;|kGQ;yn2yBz~ zVqs`z==`~{p)^N*nMo_tc|n^3<|RFo&Xwvga&e(tAO=MypQY1NGQ%>R-b`*HU!1s> z9?Z=x6sOWFh01h#mgY{8v~m;EMA(w7&wo)jwmSR5d38D#pJV%*Sa4%o36y}Mr zrnL!QRl)>NUHUm?R-db4?yOb|eYDF8#dMUT%ZCB^ic$U_p{5^Gy~Z*uYJJ7XO+L{3 zp)SoM>IRt-3eEdQ9dP!zD(;9>_)VCUXoZp?O#QsocVNqO{dUL#`=J)aZ81r<3K8-3 z>Cw?M8FHC{@`K0H`-k_Z2Ukh}BVoKJpw;Cr<~e`z)H6?yo=smolX>>((Ua$oly~`M z2ZkS+Ob=G_rTIcJH+MvY@$}x~5@rXyJ)zif`plE>I!p2QxeH^Ix2(GANEPtc?W z3{M0|+U)=G;|Yo!LWp0NznFTh)n9xuu0PRt6Lwv$C9xI*Td1ry(QI$TxyVSdZ*Uh& z791w_=P=cFl0QupXoA#?<=CiHzlp8rFyT6Ncxm3Cz=kEqBwCQDPm+L|Ni7*5RywNu zACbA@`tivIHJEM*^%x8LdrW%?o3Oq^riuVF(Ljt1e8!P%#-@4Q7!`g--)KUSegP~x znN+i`Dqq(asPrYpe6A{eiuy}dz0bDwm#dkl?B?q;Nl zv`}efeg#zp8wFwcwua@`Qjy|YSUldaczfgG&9ufg3)60)B{iig9SvHP8KTOSMBawU zQVcml@b@LWCJX**3r$gH&n@*Oit{dj-Nu>yc_*CN>6n*NYcyz^)Y4)1)`sRZ9-0|Y zOM_ftcLn;ZDP7qb0{!&mzfx<`lmMo+9?Qbo*V6< zFMoz|(H6DV&Kc@b)%I|XQ8$~r9#d-_s@mb#^BFx4jW3;Z{b-e)80@_jKpw{xx}e=vI{ z<}YKnhS%;b&7Ql_?EST>bhB*S%-$c_*)w3+VD<<@3-LsJ^~db&87Oh~zI%h&+tOh6 z0$kZ(#@cRa#_A?#eU1K%JsTfiO2p;w7N(1@8`0dOl4eZ9&{9Sz^ZS}C*~*flaU6mH zOQH&w<+-;BGc4zHHmOQaM9j{Nl%%6e^Y#gw5dD&2mbEp`SyU?2(+c2!gAAw!Z}YkB zd4`z`wj>P0hFm(oxL6ShihMfrtnwAOQDqIFWxIQPyL_1qEM%qnPbsS=!IZU4_;9c8 z*3gXjgFX{vO^Vw^GOBK?@$^V=QKSY6gF_Zxw5!iq0uvC#>hbXy)9-pCq+;s z$zAzj6gAu28TbQ~p)Q1ClE|D!B0iEX92pR~7XhT4BO*0Pee#cc&*gp=lgPn=;RBO< zL;+ivPfK3WvIaa(5RJJ5eNCO zHr~uRWff^EU5KV-caVkv;t^2@z#JYi*!6A%|7Z6-h%HM)V>-G^%hlo3yfi*Nx z)N;no0Bc?5UO!yVz4of5fH$e3dYB!WX*?zsbJ_Bb>>0CLqzU<=Oy?dWNb?#*5;t_djE@L+#qEDrVhllOcAjc~7V$YG(K)JcR zwWe^s=%KusJDS(H+>y!5SKZuTV~QwV=H3IBz3A(iK+(Dy(#qSMhm7zY_1{&5f)5Cu z-4E*qYDec<{Rx;ya|*5t6>ULk%(YN=UHeQK;bg?}CHbfy_gCUI8HE%WLzprD+TDw! z>W1&b0Qa4v3@_;qyu3Vbh!auoG6K>&^e*$F@(q@CAUuoUDIZ9ifmCF1K$(pR+^sD( zVJt!TfbOu9FlUNH5VZt_c80he@Ggi4Lv&BGN~tju`Mx;TKoqmyHqcHJz|4j=B9XWZ zs+PY;g7kEyvUp_QzLk}g;c{i!gGF$;aAml-uOI`!=EbY*_- zUSYraQnRuDWWo4gHp-Vm$|LXkTf6g+?#bUL^lEI4LVNuumB)wbl9`fYv`l z8&-dSxD{nFu3O0M>ObUKHf=!B_&A-v-k=S@^fGD4!uv=Tn&9k&N{u^M3K9nVk+kmu zCUm@6tv+X!qbXGRBs3@?w3#=(VO;m3wx{9^|l?#S8Z=yTVJEz#;rG3!`^;Xdiy={xvs~H+QDqJ zP_aD0=T^?l{caFny6DJsNu$LC`T=cIdb|}qw!k$LW;INI8qu^*g8j+-h>pPKeT2=* zde}@5Hm%U$ENt2uGzQpgsR0DEwl=odNnGcaSqvN z(SxxdzBJ|v9PIF8wZq@yH&Ev~OQOF$gsy^OExUB$Ws54uF)CS(0~>udYr-g#J)* z$Yc>b804C(7(a}c5l}$^nHvOYi>L+QgNB2IoY$(GE5?Zeoiu_deR0s#Vs>TPi0ab_ zTEh6;3e|yd$H#DX?7W3iDJ7vzc@Sn^k?i+^WM4#SeYcJY(e4&Gbcc>PNf?-8txnqM zr8wTD&pBQ8=@|#oYRXCI!`j51jKuWXm|ks|5PObE;NE7030*~S)*nbjYza5DLoxi7 zno%1d+>sbguD@0LGGL-z_d7ASC$ZxQcfQebAcTKTf&wKzFR2c$X|?($@bdKKkHT95 z?x3`v!yeAnc643+fSF1Jgb?l;bhaP+|v0`xpdrl`D76;20V?8| z=zS1+V>NrD@GsYtd)KVnXDSRWP<}61#K> zTG&OR`bl68w>zSptkA9}DL1Iryw1%KuUpB0`{9u79>oZm15%T4k9e|TjzBcizl!ma zvyU9(4-?=R)J;)O5AcAtH>xx2&CN=7HiF9yRp|7tUZSF-30wJ0!rSUog&t(PP7%AVuKv*X!mY1SDERe+(n}{2<4ZC5{6Q2@ zI_Y)ZDxf+6`Hcb9wGp6RNP1m20o2yF2T)&00#M0Y2UKh$Kussdm-fnMi+r|<+WK*Y zg*C?gj8QseKzM=dLemFCr5nngBPe&^d92QwC2e7Y(45XP`3~cGtWFQI`4YA_WR2?_ z8W~W4ZeaH@8wf_vC=~5xgNVem2ig4!82pUL{fgS8L3lUYdB7> zbcGX?H)4(O--v;xzAI{JL>bQ=p*Hn7n?XBl&Tp|fS3gNZVBEKAL0bJV?Hes5E-t;= zTd}~5PF8J-*f-jIG;N~oq?LT8#cyv5sYOb4!|26r{&sb(+map9sdiAM@ulyTDNb^V z_sR@wi8gtTO3DlJ8JE6}ug)QMrpfI5yvMh}NR{`|uYd{ydg=r`uw7Ykd3@=V_M7;G zZ30Q_%N>=RFUV)yQ|l}&nQug}ru?lM+Eg&ZtMra+XhNZ(g=h#2th1G5KA@s#XjJ*& zd{QnuYfL2{M1XzLh;hUaC-_iuqm+gfgXFIvct3*BP_SvzWsEu475;AEHr%dpYz>E) z61taL{$}Gp;lSRpLw9Odb?Y|O>-5Xhee(2wa}5wPZ(|K`&1`B7uw>&k&}`Pgmj7?p zKnDA`5$vat@D8v)^C1Kw>Jz~O)X#h~0xpNYA*6@6uwr~3eqys&Z_Xc{IWuK;sroS}!YWi(Z|o>vmD}oS_Li;kR#{AA@r-SEGb~=KHG&}JF#f+2eHShn<9OcDwWu`R$d@ zlaBv%-05X(R}hP*<8G_IU!Gd|>6e@j$^T(JutEIZ*aRjX$N&+BCSSlbINWlb$Bzrj z{<%(Y`N!&BWHAQ6)0cOGhJ&CcNP3l}zR*ITFzi?3Ri2f@H3jq#kWw%C+ijM*rouu( z@T8m}+`e!}H2O{i#KjP@I7(`K*s zGVp8n(&9~eEs%qPR^^Qv{XJDWQXCyG-W5p8Pw=3ZIe#QcID5GIL2az@)Js=2?3UkW zFVbMt8<-T&KQmh8{S&mvuX17%Mq{0!j}6a@LZMAX4EQOQ5W(Ft+8Dh_f78$5L~=-S zkL0q-Wu!H-t!A1jUK6&L6SntD(dIwjsQH9#9zYScz4`{-gc1fLKL{=f_QqBui=Y+! zFA9WDye8?Wbm-cF`dIeKG~+NKWya8AS?gi#gZ)RqTKR5R?a$ix7vwX(l#tJ;8uvt5 zA^$AnIf&)sjTgBn=-RMhFytuUG9jx5 ze<6z)-GHPM-N~LQu##_+_yPS$E4@Mq8-`|?VI^G3mljSJt`^FLg(5u;2YDWl%Z2NR zN|jkjE4S0k>ie=I_hXnDu%}$|DQVQ zm>$G6VWt!2wgqW;&oIG96fk_RC7Auw2=>0+c#wNnfCtH)h81BI2SUNBBFUcNwxcWx z%sxFYuWh#umsK(uYr+v?Jzm@=Y{?}(e3?d^SWPy|=o2vFNOm5Rr{|G%;iTV%t~S;3 zf1Lcajs-93;m&#!`y;U_|$; zsuqkSDwT-k?|R@!=~^d3v+8s;R&#-XIR^bEf>JR4M$V9L!&qDVvC=j=GKm2<*#YAc zivVL%{!n3Y+z?}u>x_QHn33QDg$ZJeTd(&Ui~(*mMUf`9A=1Qa;dFCkxv5*m9Rd*W zq2tBK+veOg@hG7D0nO7nu~2zg{UR8oK>zgRyXdA%XdC~_ z8@wM495ExwAM=l;YK7fm*KWS}yTufqAwRpS&oNf0)t5OjEk*{;!0xpAG$*PxD0!In zrY4^XH*?o%3#?R!){Ig`2e;aUlv?^l5wri1e7;paOM)aR--UFi#?%{ytjC-#A6@Uc zd$)>ci3WRa(Ef&zCj-<$Nbi)15ks>;vxB1_tKs4stRvEIZW1SG9C#lP0fhs(*xVuM zM5Ds}B6f^MEnIM7*zETzYki>6WA#d|WV((jRtx6ejOj0N9nBgH4}@~Vciuqo9SFj) zvegCyzRj^3@1R^I>y5k@BgP#?LR6eq-KkO%J}M&bRv|60Q0>9qletw0y~GS~Dg3PB zdB7QEw;h3^#u>ft-pAm~w26=S>wu1!y9Ujt;svvQoFGUZ~Ab&ZtZwCJye+6K3YH2aH+Bx^7BySX|3r=q<8Ge zn|^m9j+C_XeG`HY`u*H+!ftl}KOosZf&rL|3woSwZcnU}JHI_96lHV*Xo`E-@Sp4O zr?h{71VT*RI&`@Xbz~i;{CFK%hs~kVD(eR4yj^>9Al76n=Gh)G|9~>mIJPaca)hr4 z6Df94I`!5U;RAyNjNwh z0q(@wK7IN7)mm4il1^Ri0zNux-D<6el}P`({C9xI2#zf*)WkL%e@~{DJdV8pr|V{9 zA0ycn>`V4AavRyAktU7AEs_23(5^})eL`5a$)_J~+3jlS>%t&^NR6X)+OZ;Js2z;> zZx0Fjk-t48=tnmuK@@UHf+#YO4$+jxZ4YjXeFDJ~2-sOtDXey*#)qVeHnLlwSEO(x zT5wp;yh~XbY9)))ap+paEHJGi64v?P$sWK(hYCfBw-4qXMfkrcRf993$341rn=qt& zv@-G;K+)`^89FGEiO40rLID#F40Ih%8?Sc3|6QUE;dckh-I>kbg(Y3Hf!XnwVa{{T-I( z)vw*2R)WzO5<}|3{nT$m@a+h=Z5nDS)Tl$)6YQaK0&qn%l+PgeAp}%YsIc6Et`b0C zQxw71RdOT0;#eGm0i=i5Ibr>`I)T2Tcl!DY8g5mQL&=WzPlbCt#kl$>T&NBg>KYk( zpvzR_{zC1QB{HfQWBDX@L0)j0oWw3LKd zuP2nf#6vogn@d4s&E(A>Xj-9&=gl8`ZyYU$f~Xl2G|@$@9VZtr!ab*6_W)gR?a zb?L9KmDeN9*H47o2MrL4Mi`<@C_n>wBIl{Ohfh8hO4bZQTv}4r4rDgtv*p4o`3P!+ z+Ap~X83?(v@`Kw&oY3ZkOMFMZ=GX&Y-c?PC9lI^*W7amAD_7h@>Ww+GqAUE4;KHyN!)2 zOWw4yW4TJ9ma}T5<5~xGGeCwBmz3ag`3w-ka-haG4uVB{VxF)R;K%ec6GWt^gbu@(J_&g zV@{XTqwkl`gXS7*bSUr?!R?;~9;Y*hH7mTCa+0@V9mEYPpR+3nPL1Xc4~Lq5xWoPJ zksV!SkGb_%5;Ymk;RGraQ%gXb5f z3KO}xxobn*hvlWH<>`gxIWK)BpI*ei?!sg`+}@HL3(Hf}f`#D~PUNhk=8|F}lQi+n znRjRYE}qXJ*zkUml#d*d9VK3-a~1(*Gaiu+bi2*-N}w;lg2;kor>q2CKBZhZF%Ij) z!ZLU0ZoS>?H(?zDykwDIHKNt4{#niNYl5`@>iRd8sK}QEIsQGGasNQ3!`Fm8gy|*u zRYGxiExoGjw?WDfF-4VNKq4Duw6y}9mJv1MVL z%GQhxs2QoHud1a_fn z5H7V|c#P(Jx@Ik!XVM}2{YPYIp#dVijKI8NIM&f-)(q=sH#@9m4$HsP`4|Qk=AThJ7#^JZ;QBt^6s- z+e=t3`!S3Dm)Lk7gurMt|UqoRk4jMvo^AzF29 zBrG{ExuGchlJB*&X7;aRqy5oz9VAr4rV+HMJ4G{r@Oxb4Z)&7;yGVj$30}UEE*8^QuXyPRgjB`wCXpthxh5Ai zvzkJSi$O(*2qN2RGcGPpux%CjW|ij@SC^>Xb2357P7uTwZdz z^V^M#sUU046wtgv;M1(l?|u!%cj)T{@X5edX1?VJ?m`Au)49_8LQzB_6ffoH7IGeG z&AF4AXD^ICeJXp3;`LLb7e;ZhOZM5PN6Ac{3f?$f`1-NzQ%`5cE^=BgmX`Cx^9$!s z^ZO05r%s+fWiJ;jR0_Gd!Yg@0)AHqtv8fr#0gQ{G~0oHAeCRC1Mn>Y!COyQ$zy zk%B1!j&Mq_QB|RHSuh_JZnElX-M94o8w8(5@LS5K&Y0P$k-v26Mtxr6;$PqE@2=@}cmjA+eeP(`=U1a$Zf`)Bc1TPi#6t1uf3MyH|b?x7h z_(j;}B_0P^NPTd!Pw@4`o+;d z{dFm^_~p-?d+w7bxbmT;mGV=id>N-baHKFjUnt|0iuroi;RE|8X{@TxxRNX4kP5yE zb@lE6`O$6Qr&`@n1@R7mW(*AR^|H_>?sKgtbqB_mNz zw4h#JmtJ3&USF54tBaOT(~osgZZt~S7S=`8=c=zu)zuXbUAB%2g~xk5cn{KbUAlP} zSx1mjXuRYH376=D1%g}R4$7Ov9gKPaz&AW$FW{Tuc4r8>nAG30F2S5G_geh~RaYXm zF2R4~Y2eH1@GeIk&2ZOm_-n0lB$UpW!T$>CR_+O*&EbN}-b^`sk` zMEw@aZUliTfu{m85N^%+>Y(yQ-}rflLKtN1h`^i8eaf}^1w`H*x>@?gi;$gYlqD!W@r#h1SR~2`3c7WqA{yle zQL!kgAT$^bm+#Z%D{EGe$An5e30720e#Z8&!b?CUK7UgO*9(dJNc>dm_C0AD* zU;W|43uw)T$Q11w=G%QydNlKQP@cQSY56rG$U6}HB7)yX0QHDgG)2tnK}PQ~PZ>)C4nu$g(Rv7VPuk z28;otThlr{o+}katYtZK=M5!TidiDOZhojSJJdhH-~vl0|Es7clj|=YGj$cZH1%Jn z9}1_;<#ZZNid@@cmcT*6Gae>fsOEl{0=;K@QM$s7x5nj~-lANJHrkR*RhrQN%C+3A9EvuDMLRbr+Ni5_ zMY}dA+6X$gShVe}i*^%6^!G$3TO+Nuv1reRMSCMf_3ahyixh3U^`cz1*!ms~H6qe` zQm6gH2S)aD{R|90JjweDgk0IA{RbbW5JKW1IS)TFVzWLn8PYaCb^l`{RMqVJ{*gyV z{KrQgJIsRnhaWv`YZ#fdF$WLWhyDA5nhrl^dy=zJ4<8It_sb8(u*~lwpv{-oCK#GB ze~cjT3NUo`4-n8153c;ip@n?u_arc67xZHDv3P29NM6l|AA@vNgplZHPoz9i^m15A0cuh6E6g7Aj}piIJR)}Z1?6vi^%zet+lKemHte$D-)2qm`i6fSf8$F}tMRJi8>2HY zKT~&Z4$rF|cyXY7`B-RRF<&cIcb-)Zl*_{#lv|lD%;nROlo6XUT;J5lTQqgRNC6ae zDD+U?p$vosa~C;!Gj$;I+sg7wL8|4s3ZuOP=HEzYm$);WomiM(%$4$4D=d|JWg*%V zRSCUI5A#OyKD|v3@M)*vufF&|oPuP_$q>bjR0<3qzPsZmL={?>;g5y zq91*J)PiyVkhD7ik$r$|27;h%fm#C^QSq(nQxsKj^M?V0@h7WlD;3!7uMW@v;(fiv zoiq@nuFj`EyhfqC`C%sPFrU6K5>uhw{UIGr!qgxBMEiUH%U8a9+-S#t@tMijTIYWM zI62SM_c`CS?;8`paNL+z$%jy|fQqJmviiGCvtRz&@d5xt+J{^TA4*ReC{0mNJ$<>u z-)D4GKOjO+ADgZlq(6xCo|>~xbg@SJli7>eB?GU}uliYaCkVlnEDPT&T%C;h@~M;7 zJU7n^O{&GA%usQmnC8A;`l`I+UK0CS^k2ETX$+fW-mVWXl9JGdTc7F%uK}0`16Z55 zx>Tno^(``=ksxzG0`92^8JQQhCu7f?L;~_S6Qmkq}ci zg|1*mN9Py3<+=QE%v%}IZPm^3R^8;JVyR^R^Qn&3QYw{dOO2;GQ*WlWq#j7cQ_ZPl zDj}cNRClT`b$_Zi)sectQ+GO&ZzR={GPm#9-`$9fOaAStp;V9jk4YWvh{M@}=Isd~ jCHSJo=9=Gxa2}lN#W!T!Cw)omO~{k|IBilQE}#Dm&}Vt+ literal 37205 zcwXIo33Oc7dER|*Hp~DZKoAQ-iqudNDUbp|5?UzH5GhKeNX8VX3~9lp#EeFG17JvC z2ACO&pbW^WsaTY=^tf^2%2|%LI7{Os=VWoVCe3CyZQ9szk|u4M^ki|G#63-&G)>!_ zw*UA2_bszvNXhOwIR@}9?z{W_?|=X8F7L;_+?(0=8^3jW*4Upd@;oL_?bl+)c=G3% zE5=+@ziX~I=0edmUd&u}O(ks{FK$kqE<55$m{%oX%y>z9;>JsvqA{g}nRATSg}@ue zyz64pcxk3cp>Qu_UUkgHwDGzbm@!@tJ>ACZrKiVuo9O8^ULQT1jMq<3pYb-+({H>1 zdNvzx3q1qI8>DB8@rLLbG~O^hL&h7SXV`dK=@~KJC_P(^w~d}r<87yBoAK_TXS?xs z&~pbtVN7uc+_|{9xYG~0(+|lSZzmJ)GTxo^+-1BhJ$IXVW8xR@F`mRp=-tMl~RfGItQ zwm8Lu{0DR>K4dC49H%&mkc)>+vD*|68}EMG>;*xFn7Q~cJQp7^-UFs6?t^B^dx*Zn z;(M6BN5rSGM}pY51+i1&dz8MTW(s8;F{@*EFTD-boVc{jJm;E=Q|6+ilD~(|Q1OT< zO170>a%Vin@1y48QByo>N=K3N23oH#NA%^G4%uS}C>_^wd(FkiO;Lg#mm$~|d-gFT zJ$2eUMhHE@ew{GICyXb5C(I4k6i*oMF*Ajrlg4|}4?3>G#4_v2pv-#0Z?Hzgsr5uVF|mBTjuscZ%UcnN0*uSfr0m1ayw4CJ z*Il!E9la_9W-WB<%}v*yEXTKkNa*VE#Z>P3N8ajC}6OrM+P|KlgVZY)_OZwL&)@6b? zSQZk*z*nA&hl9PIGOM3eMs-IRgs!VJABf0D@Is|(6@c%8#5obIp` zCB8MXy5<~wl@XD_5Zv&yY6cf9v3-U_y9|wW^=w>MvTa?x8`ssGAqm!AN|7qAr%ePt z!)85W*3!i9jD^O)%C|T#r%~=<%%7AtGzA|Tx~Zk!E}*@H%(+3On%ac`&?a@JCH2aZ z%qNb(h_rS@GLM`e+iWnm4)~~q2Xg=t4~d^nR_%>=qzWl>*v`ossLgKg(cLvApuouT z)uno2J|w7R^`g*PadB>gImI-4j8Bi8fViTD^9X;+keEsvE+J-K_4PwyH_qqn;-iDknCxrCJ%z2q}=f}{b>G|J3^dnNwUmas_Lppmsdo+7!;^2|p zHR;{Fu|L`u(4Uj5v&H3lsZv%7jIzAaY@LMUO7VIp`hhZ*5m(Ewuf5J@XS>tm%p$M< zZ~z`bpGg>O7quS}guazHE;1tL%ff4<>tO)!fmx|TO*$JeGvtMJ4J>n?d+t8zdrXK0|xNX~?8#O-9 zLqrbDHV5LH+Yv8V$O23ZW3v4C=_C57*sNDN|GXmP2vONw0r$%s8n_C+y&`EwDq-Z#+MkLt<@{b?D@C|Nj6cN=v?RM zv*YDTxl`^idIzJ=tfH&vBLb2cp zb*@?A2tddoJhuxDB%|D2a7YJG+~rECY$YbvOkkfnch>&Tv1lEuJK<#E?Dlh4E0wyE zed)1^oOPv-m8z!*fY|)Ahb|E(bP8`Y{H-I8hKYn;ujcdjAi+N57a6S2O(*+e>Dbo( z0g=a&LWVNVA^BDJQTadNOggi^u!{;Q@IG%yBt>8;xh5b>!aFgY4IY&f z#FI4AoHgq8aFZYk2=C%+nhGFk8gOlT40OK^;!~=y8&*oRsz<3@s^{}LkjmU%I2OMJ_5!a%^Qc!3If|(#u;|f`s;3W z1Coi-2)1QIuqmk#8hL#)`%K`C3SpcnkD9o2_HxEldW|DL3zRC$u5sRJ6UQ`he1pVs zO|1ICoEv} zEr?#t*@?vff9J!o_y!~zB=CBin6URvPIvHs7R~k_f)JRyMuaI*Xi`{CDcT7#hCvY= zA`^TV&S5x5OvB`P8=Obs95o#Wn@x|P(Pa}b$SqDF61sjgl`W@Ksl^8?aca2IlwYVLf5xj5b4ac=nxmNZl*jUXyXuVx`{T>?};>Q8imzR zh+`Wui*hJ7M2L_z<$B@7JGYylv&v+?QaxBjqAqqJCOl}9GlmyCsmq0>Vm?pZF`rjE zg3NJvTgBUbz2Onb&z88kCAme6=HD&v_e)~{B}+4@2;`$DmPtynDY zza|ttD@`jvAg*OAbJ@b;ViuvbY^j{BRhEj`xoTm4NhX`bX0q~wi8qm*DQ2NcXKPoh z#caJIk#mBKEIcf4wo@`$@3{^I)C@m?~>f@~7*=v^` zeH1XeHo^2-b6xNp#+OogcJXSh^kPvG3y-2bW;F;Zu2~@DZVDLg|f#Ip1xjR zsFd%mv8MgC#nNo4JU=lVY6nH3D8*`_UZjx4*+@dgs&o5|PyP>58Mqc$ZLW1fx3~t> zt{WfEQiG;P_}MI8P%ZvbniqZp`40*~5`^RR1oe7xskX|1!X`iyfhhYLZgqbd_mpom zpgy@4nQ$Vu-bffx_eIded`nW?mA*)2QqxTuFxA2)+UI#rt=jL3lf57Vq$Cw!77VE` z$bT8fr6UBX#ywjazdU~S^!f2Ddwz*g*A|L`93?j5yg#r)dH zgGQk*B>apLZ#4xX>6z*2lR0vM-L-v>W)Dsr%#K~F0;&Z0-hjx~wlK}v<4?T(%=D@3 zg_F6b&P*Rad!)9@&%1l#fw}Bhy;xl;l?#hUM25}oKPqm1x3@QB?I%t?{*F_W8lOIQ zo|22z=e#0hKQ@h%s6_WNX9;3+)NNT#;f0zii7`L%yiQ38PxoiO4CRat3(9p??Ro-cn- z!jULJml25uDn2U7T~IKXxqJ0R7e4NmmsFdAw0S~0&X=D~L>hmeG(Kr#TU)2)gt&-$ z;;g6SIFeA!a5q%bo|X76y$aJ}ZU%im4Bno1AHqPs%=V>4zu`Z8Gb99H}j4tuGvF6wdl?#LarYS?l); zt4g;swiv-3$s={IZHm?{YR+cl7?dUqt`DG-YXj!6?eRdH%+ll5-q@jC`BV(g%t}%I z7h^Mx1DR<%kU#CT&e=YW+V?~UvafX@J0W`Pwgxh`;XwYqsSh%9LI?6M?LaC#HW)|( z)588IrH~)91F7)hKz`>21G%ZqKn8fT!PxcQ*4Q--Q)`L-*u9vTS(!*U^4!+AfdzMO zNZ6+&PK~ADI@6T9M7E#Gd0iW>WjgxY=3UeNDh8$=?{6VV-nl8iD4QctH)ozBwxgf#Bw%8 zZ{a|EIU?ealqdhV@L3yWHVGWtJ#lDmuc%X%rL3eCooTm6Ak(ORJ*VgioEzahTDzx1W3$C9Ku?N**@P9G2s$_)>*JFfE;9RLzMCL<0bGVp1{W zBAj;`A5Go`=UHR?^7aw?Tkh$g`#B#(3N`?T*itH&i`6_=l~79rvR8s^7yKL{ zpj&5F!Y}M~MAJsu)#ncN@A`U7_90z$Li127tw&hiNH+=~*fG+6E!-u78?pL6Wd|1h5ov-xmQ;wUHJZ{n zeMP^{1)9){AysR|&qWFN-vN-hzhSEYcn@q2zKN;V2l7=EX>|QNM(C+}O3NiQ&zSX- zLqARqO}JY-Cw0-$dZHn%%w7qdbFIbQ+T>GaaA0Mxd6_F;4U8t`cd~YMNeK!O2?klE zQLr=So-_W0Z35RpIH%#9@}aT;c@Hrf3S?YBX|0#m9OFN13%PKt!5kB+_zBywU0}go zx-|(r)ar_Au<SW`{hZqtC$m;}bPwbqDyqbOMWE^)F8_4@LW0|%~MyEaj)U-e*r zyIOi-qFg!f@|COQgAW`!Abp#iT{>{$sb`*e;;D&+`qJXv!T^iajM`!HU`#OI6tc^O zQZ+kPD;5@OUySA5R7FxU*7_h0pI1bp6O@qW- z-%0@^(lu-LJz(OHpfc1v<)_{VfSamP8emMs92~J!!vIX2Q0XM3XIR$YEYYb(>PqK6hVQe9^X{U&sNHEEzn>FmlCCj@CTXOqrA_Z z16;|z>GvO#$JTCQYfVdQ z`=IAEwl-$VYuDO@-TBkDwSOTk{ZV;7zNOAiO0YIA?paC7J zmo@ef8k?ff2--n7eBPZ|`6+j%+~u&R{ZSaG;`(1q7(*Cz2QY{?!eEn+Ww!@|Z*Rit zCNP9py%881>jN128d2Dxt)u{ZEdAY0#{Q5y+PA+t zHU?!Ncggsiks>~A^&seEkK~r*q^t80(L5CIbI-%s7o=LO>+XR99I6qZf@7e35#b$7 zSNTZDX02j(O;`LEj3vC`7J=pbwFMQZ(GAM#{zlDf6xzd87Uktw^9-d*$~hZsJj=aX z0`3OQM!m4@b~&R|8naH9+vWB+J@H;Q>q2n0;PLZbCoa8#<$mlBq}duqXilW5)pf9hI`>D(*v`i# zZOK|1*0|A27>v!Afbg;w*L=q@tkOb~&HKT6R`;P?Ny4BzAfnrh+;pt#7u+lSnweRt z%k#KA!y<=6E(BP)Z}4~)d-I?xFH{yqzt+XMN|h^mqUo0_u<Ppg3o#c5OK@wd-h&HoMW0*wU zq)PHSS4_P@BV8WFZrs=u`p6WJgM-C_bdbce_ zWC58Ds1fWSO{;Qma}DU4=IrX6v%5{s)o%hb!$xLnvwev++n^ zeQk2C^oTt0UT3Ba6#AR`lDTatw8{AuL7}<4+lNA%oNb{n5SsyNY>u`j;WV^npu-rq zD>H0Or9p%%T0xiMz%4;n#&YAej{m|m6OStl`eVyyct{rw+*fVjKId=+R_PJTRMSj z^H&vIsW$LaepvnVlf6(*09yOv;Ho|v=5E1H)_jrn^iX0vL*vt)}9S)S<>&Dlu~YV zbuwqIp}C1pJ#V&Y1$sq${ped-Y+BL%nO_Yno%ZZDtaP2bKiJYEj5&1G zVE4{XI!&1CSW1$X`(##%pk}%owp+C}qJ`L#{K@;-NTw6V7|# zgt~kOx}$a!ESr8OPo^0dZ*0Z`VIJ>)cL=WRA4m{;AH{wP9xlzbmddkT-H~E(lB>PB2<^&!Bb+_P z$}ZEDa#8ng)Qmqo9OQ7hljHj$92%KuT7r>@@L8R&7gyBqL_rBFxU@)V>>^^ZvdlH+ zF`hl5Jr$+QR;tTaYdI{z=e`Bbx55e0B(&h7XpHS*ej)Hz^9vC)qiGko77^NrFJ$FJ znnsH*F{k~StQxrHs>PL;fH$8+iGmqP{KkfQhEfBm?W5bb!QR#Dq!~Wo^t*lT4%6dq zaqf4MPKNfcBl6qN|Bt(&|ESw<<9Z`;>OUIm70m0U4UHBwXB%^23&#OxcgNWM@mUNz zQNdW#E5I5Y*to$1s0+sa#oXZjh1LB?qQCP_SC&ai*AORrDw# zEzpZzem?iXV0ekEkjM0Z8Yd+RQuKSs&sHv8%g-y4!0CViFxwWlyGHjt8K=(~gw5XF zA!9z^ZdV3~R)YBv+P^EEyM8xq%Oto09q2@BywqTHXpNf`re?}J0%2BJffu(M7^Wz? zu70bdmT&5VuQ;$9e!|{nz@YB{=so`k;TjMB)B61q!Jkx3agqB_czh6wWIAFjPcw%| z)|O$$xe;Tz)?b``wASegXs;(H<6o+mQp4;FbN$Hk#x~3Rk8f0lL~l$u~@B~D7{#! zl`3WKvK!;6G)@lA8j`}MsS_Mr3#qDZdqYE(^Rtnows2b2qkk#^dogivh|k1g&fTIO z^|}6%z!powhJ|eHbauJ=wuS$Pg#pK$LqbqV%>1JfWA{{b^+dYd`uUskW+X7FdVe^ab7^2P?a7`6f2x79a z+usnB)PP!k;c3?*fN}Tg_K%c>1|jjd4~e*igmy3j1OHB)2n;xZ23BM)$@6mD(&E~K z&Q}0~%2JV=Zz3o2-qNW&JyaK-%=-s*Dla>RE0(kodbD0D-aA3?aoAPd=nh8wB%J+k zaqJ842*y5jw=x;b$}WHGUlK>gvc#o3L41rhcO72eW(RzY%){DFgapr&4++DGk;3g( z>q)#Jo#oR>SWhIqo|NV$9lL>%8x!<6UW9*x7}|yRvQmk_$iic&x4plUlG)bV01$V} zP38e0z4v6u*&)PYOGqr@KA2GEnrQ?RX+u3?IlG~xg3?SqGK(--q9?@lXzj<~)Pmk6 z-fB;}{hrW1I5~&zblL6^YO$Ab%pV9&!H?;f$C{OE9ditBSHrN! zHyrjvBP==(cgHOVw*(Skz)c2|i;ctmfIr-sD1^XIqmr%92CQUj0?Af!I?)UZO18UH zvL$d8DXe76X@N z(^wPT{Md$QKz;fZj6I?Q@lx-;bE84wLICr& zK6jLd7P|C;78Hj6MD))Z9s?74petoSJKYrBba7p9q*lWtyG=j|^pZ3^&ldA7l(AsR zDS{)e)$A~f;=X+{OqsDMQ!OdG*-lz56>&(4Gr-nC#XOpC0?Y%|_-ZT=-SiRlHWrBD z4dWS_`ZS_(ZK%_xXP9EXBanKW?M~}_`U)3^Zob7W++mkuja|CM7VbBqy$Rk5Mf zI^o7C@D$FRpLhK82M`OIW=@PV$BE3Gv>)z7W=ogd7F%nl0{QjMTC-pQM! zwgIa@Py0SQh~t_An1?4WeV#rsZ zbG?8K!6N@414kIh#()(KF>otcl!|F8txDi17}c5-G9qMPt30FF(c5QM{zjP9r_2m$ zr|Bj_H2T0Nzvc+Wr@p2L#*elq7!(poFvy)rHE8^K4i1-OJ_?6RGHl9Y$cCA#`e;a4 zXw|t5>Oy)%67vo?&loF^T+8D;)B?gn{kCKx;>y{;XOBsuL8Y>UTaVuUhCLF-Gz>bM zaMW*BEB^<~uTB0SVDtd8TB_9};%u)oBJ?Urn|iN<+cbs)-!4%Y@VgTf4Q+!V7b1-+ zt&Z*kH9nD2%9UsgblXY7JlSWdf$mUtRrCcl&?Q70;3X&*XF%{)e^ce+9?WXW#RfC; zChea$^}4k23qfN4SNHz6j&AoF=&O#X@l zc(T+`eAjlpl_o)i{>?Whxud;G8%P$oO!rUVd>b6D#)Z;6k8FhSBj`c_9sr3*@&63Y z55b`*Poe&H<@una8_Z{4o-a&(4&Bvj7l$a(>&)$OTcmpDPG71=6=*xQOFe<|Gmd{Y z*W<~v)xXw>mcYQ-J^#u@A--^R0&4}BXxq%o&{2QUSM!r5s>upc3IX7q(+=+=^hF2gI-S_+)K%dC8u zM-i~~J7rV>xM44pR2`J(h9Bj*VQH_gk~ZVYUmYuPd^W%%8Xbr!p?LZcIJ8U9#={k1 z@}0+!BV0%bQK)LH-9wjl^R?1TMMatrBSP&kGzO7j8sF)3>;N}{PJR(clSNW4z5-Qn zhuM%bOhk!^|Ddx4YY0*6DT)5wZg3y9(|8~CDd>3|g@ERVAu+2CVuy%h2TKi!bFN*P zyFq(UJ>dr?Be5yOj_8s^o!4+<{i=Oosg8xES_{hrg#~4FN0RsWg#}dwrS{rbzpzcw zmbEP`)v>TtQ(@XNnNX*&KiY({zFqAU=6GuL{TzvYHPEeb$uQ+_Ez^>@LCprb4S^$e zPY(gXhHI*z$(tfs6AgiptlYUewen$S#!KQ;6Oi>_F_?sE!PGu-UZSVT=Vm7gwicU(nDM z-xMyH__W1!>`c?^4_qp~;bP|pBiA1od&+3Eo@_{{Tf)Iv;te<|y4s)$wd<)3Gl;UJ zH(5xsQ4h=N$>83eq@7iBF`Hm&r?Zq-6*8~gYti7aX zJUd&F$k}=pE8R$dI~^yoV`nS#rP;#b;`MQ^%kiei+Ct^(qL+Q4m|ey?Uu7;EuA)ea z%GLP=!M5VwUW(uBNMLWut0r77#$cH(EYu)3(6sr4!*45~dK{U2mtbEeQpJP>!j8@t0o9gX5H!Er$c)GdrAezP_w5KrD=cZ4r9ApV|#*F zJTqKigKkv&oC&V4(mOJE=9D*`7J1QW_T5*Gj4h9!9nb9@7hGR|o4wie%6RrH52EJu zPFo3j{z~zB?cRpWW4Ia>2dBrmfUQ?@=(={zPRfS#&yJ1h?iS+n-tnY=k6Z6R>M*Vip_+4=|f1NzBMKBK4?$@roPFA@Zk@TTChNfh#-$em^D? za+~m`X-pe3L#|#JYXiDLlx<*xXB#V|@8>qIHNje+K6n1XucXrYo2xX%N_nXMaHncP zP=Y!wbmzh^AB7V3?N^;<)e;0iKl1;h@_)W@o{HAJ9#k(ha%7FS zmyBA2@|t5WHd)r&Qq+WFzY#)S5o+4up=y0alkHZAE86s*-?-@+Hr>DV^-(llEqgtU zx1nz|85|P@7mEW$$P1gRcKj*~8d%SteXPw;y@|>9%fV}>T5^wvwJ4*dt@3A#52&KO z%;6jSkY)c94xUG8j2JRkHbfVw6 z$4QIC*TZe({Z79#>f9xgp}P1iPp@dD!F~nIW=ep#fP0N1%DQW!M&%pV&#vjQ* z{2^S0l+WpaP@Si2O<+=i00O82VN6H75rBR}%=p_~(oUbK>ZF)nv7RlLvoF5jWoO|s zJKiKv12PS%s8cj#T3!wcLO>AM+n8{9d6spX$Vd0EF$p_%4L-@aZtQq~N9!iuyq=xl z_d7fa()l>#sx^MULlxD$Zic3EPZ-nm$xj1@0x7h`64*#!l+o{aYzkntP^d0d%EI-L zy;@wX6g(1u)5mj9otr-MME(iNlTS{co5sBm`KQiIlTJSwym3JAy`21$XL9E+aP%%z zuNH~c=gywsHRSmxj-P$PUO-l=mkNuemx@YginY2LZXJ_f5{dRL3L10Fw@R=G3G)#EqgP8;U;R7102%3i;~ zUA90!rH0;RLnY-VwpoMkj@GV0_xLICHOr5KzT!?t*sQ+YZaRK0`ZY_g90}px1wD#X z{D$LSftI25RwDeZA9{6ezn*+gK}*Bcy>B?Yye*}=H$Irq4{AP)e{nMmL1s8Ol^wUr zxXmb!9_-o;be-S#bBljAc;Ejys!|S+mrS3M#{3<{EEhX|O9ZP) zAZ0R&`EV594`Mcf93lbjli!$-w0k1|_c`PI#;M*(+w1hZoBX?#QzjxIJu3yt^nH$` zcijnhcgmm4!{_2A*yOpcr&`FXr}Pr3HHYS9H8cm-wZvKt{D8zjti@e*S#nSRm1=A^;)K#{IFM`qz#%lWB-E z&1U?&-~_rhH%9ZTT6&@)D7@X*=WZ!#LEepsruo7rVpi6NaJo~7PX_muZTQ=7e~-DN zPUl||FLWv(Uhi;pMcT9@TTV#1R7k*_@?1;6A_OKHWJ!ll<{|_p9tm=TjIk!75Dkh2 zLGdV2Al(Rwf0vqQFD$zUp)5aa53o@^i@H_h0g&EQJJCDKhuJA`x_CfK|g`^UFVGQXj;VfBde@% zcm;TBaM$nE>sJ(zdph@zk)LabS@~75;q7q#C7eHn^Ia$%5w+#TQvLQc8ea^EYBc$U zKMi#noROwZgD38H1f{K!zCw^4(N8b?c96O7v&&xa83^iTxj@j^l|3J<2jMup{{d-? zfC!WdV{+M2>2KW zMHL$IQjbZ3Wo=LQ1sHHgz3yRw8xDt&yUz8Q_W8PQkuS~50n>h}?ntUwn5u`VwDew- z?!_8g)!n}IO&jFvx<$Ue=6tp{&Morw8_{s;8B~CL-JS9wYgd?cV1umfn%b0g%LZB7 zLFYEJ_TD<{AYsG}b!cQyq}E<$9ojJKa3rhq=CY1Nvi9D7R!&T;f=-9J2kB!AB;kB-!_alMhb%-Ul9e zm>CaFJovCJVRFufJaou<4jv3jdiW9Bl7bDo|DhoEp!`rM%Kb4MnnY>gfpI7ImvDjw zVx^W}gu|UuYjB=7KL7X=ag=z1UUZ^1U^1cZln3Fd{a0zCTAQ66PSQ;}la4;wh<{T2 z56b@soBzv0Vhb_|CiMjBy~Bzlu37DaQK3XCTjDsck(;b>f17=!P*QUJSE?^1PIF$w z`>(@O)~hc;3Q&%N8wUdG^csZ;m^HR?%~hGY+NOsOOG8#(AdOTtVRRVYq#4G!vb!`x z+&`a~@Dqs)5M-5SX7v~kMcT>_HC7&rR-S0C{12ssKQuEdXUxo+>l>0g3$u+F2n$2h#BL4ij&E3ZeZzXpH>~@jXkO1^rb2#aHy;pb?M|&* z+Sy4~%GnpQWwnODuATb=oZo`;+i-pd&cB0$&yD7O&)7MxOMuLAD`5!q1Cx0AnQ05M z-GG?2h)3+%9+a znCY9@S=a7Q868;Jgex6xI6*qvG5mD#Fx^9T0c@z@EC)BOqRzSeh5U-nv8E%8*yBve zF#qi2rB0CI6qFh3m0p~S`vRmp=J0Aq-bJuh9?y-JE9EQ~w6ZVCJFXP4uVw$0qop=K za@Uv-FSaiEgu{D)pnCA)5Z!kCQ8*um^9eZL3x`WU*hk3?_dF87os7Ei z?~`y|fkRs;Z&{&1l$;#vnrijrfL7p;e{+6tru;`Ze*lL90GPP)`9Fh00pLq;{u`Vx z!}%LHP@#xEvx~(-mA)qni#7eO7pKk&cc@t}KTY@Fk&%c-%!+9YQ!Fo6OXWI|GWWNB z2`}O+X*E5h^FQE_ymM~dum2thv9djCa>;c{zZ!lNV^sTLgAayf5~82E>3BLd`b@gN zr -- [[file:src/dmv.py][dmv.py]] -- [[file:src/io.py][io.py]] -- [[file:src/harmonic.py::harmonic%20py%20initialization%20for%20dmv][harmonic.py]] - -(But absolute, extended, really-quite-dead-now deadline: August 31...) -* DONE outer probabilities - CLOSED: [2008-06-12 Thu 11:11] -# <> -There are 6 different configurations here, based on what the above -(mother) node is, and what the Node for which we're computing is. -Here *r* is not between *s* and *t* as in inner(), but an /outer/ index. *loc_N* -is the location of the Node head in the sentence, *loc_m* for the head -of the mother of Node. - -+ mother is a RIGHT-stop: - - outer(*s, t*, mom.LHS, *loc_N*), no inner-call - - adjacent iff *t* == *loc_m* -+ mother is a LEFT-stop: - - outer(*s, t*, mom.LHS, *loc_N*), no inner-call - - adjacent iff *s* == *loc_m* - -+ Node is on the LEFT branch (mom.L == Node) - * and mother is a LEFT attachment: - - *loc_N* will be in the LEFT branch, can be anything here. - - In the RIGHT, attached, branch we find inner(*t+1, r*, mom.R, *loc_m*) for - all possible *loc_m* in the right part of the sentence. - - outer(*s, r*, mom.LHS, *loc_m*). - - adjacent iff *t+1* == *loc_m* - * and mother is a RIGHT attachment: - - *loc_m* = *loc_N*. - - In the RIGHT, attached, branch we find inner(*t+1, r*, mom.R, *loc_R*) for - all possible *loc_R* in the right part of the sentence. - - outer(*s, r*, mom.LHS, *loc_N*). - - adjacent iff *t* == *loc_m* - -+ Node is on the RIGHT branch (mom.R == Node) - * and mother is a LEFT attachment: - - *loc_m* = *loc_N*. - - In the LEFT, attached, branch we find inner(*r, s-1*, mom.L, *loc_L*) for - all possible *loc_L* in the left part of the sentence. - - outer(*r, t*, mom.LHS, *loc_m*). - - adjacent iff *s* == *loc_m* - * and mother is a RIGHT attachment: - - *loc_N* will be in the RIGHT branch, can be anything here. - - In the RIGHT, attached, branch we find inner(*r, s-1*, mom.L, *loc_m*) for - all possible *loc_m* in the right part of the sentence. - - outer(*r, t*, mom.LHS, *loc_N*). - - adjacent iff *s-1* == *loc_m* - -[[file:outer_attachments.jpg]] - -Also, unlike in [[http://bibsonomy.org/bibtex/2b9f6798bb092697da7042ca3f5dee795][Lari & Young]], non-ROOT ('S') symbols may cover the -whole sentence, but ROOT may /only/ appear if it covers the whole -sentence. -** COMMENT qtrees - -\Tree [.{$h\urcorner$} [.{$_s \ulcorner a\urcorner _t$\\ - Node} ] {$_{t+1} h\urcorner _r$\\ - R} ] -\Tree [.{$h$} {$_{s} h _{t}$\\ - Node} [.{$_{t+1} \ulcorner a\urcorner _r$\\ - R} ] ] -\Tree [.{$h\urcorner$} [.{$_r \ulcorner a\urcorner _{s-1}$\\ - L} ] {$_{s} h\urcorner _t$\\ - Node} ] -\Tree [.{$h$} {$_{r} h _{s-1}$\\ - L} [.{$_{s} \ulcorner a\urcorner _t$\\ - Node} ] ] -* TODO [#B] P_STOP and P_CHOOSE for IO/EM (reestimation) -[[file:src/dmv.py::DMV%20probabilities][dmv-P_STOP]] -Remember: The P_{STOP} formula is upside-down (left-to-right also). -(In the article..not the [[http://www.eecs.berkeley.edu/~klein/papers/klein_thesis.pdf][thesis]]) - -** P_STOP formulas for various dir and adj: -Assuming this: -- P_{STOP}(STOP|h,L,non_adj) = \sum_{corpus} \sum_{sloc(h)} - inner(s,t,_h_, ...) / \sum_{corpus} \sum_{s} \sum_{t>loc(h)} - inner(s,t,h_, ...) -- P_{STOP}(STOP|h,R,adj) = \sum_{corpus} \sum_{s} \sum_{t=loc(h)} - inner(s,t,_h_, ...) / \sum_{corpus} \sum_{s} \sum_{t=loc(h)} - inner(s,t,h_, ...) - -(And P_{STOP}(-STOP|...) = 1 - P_{STOP}(STOP|...) ) -** P_CHOOSE formula: -Assuming this: -- P_{CHOOSE}(a|h,L) = \sum_{corpus} \sum_{s=loc(h)} \sum_{r=loc(h)} - inner(s,t,h_,...) - - t >= loc(h) since there are many possibilites for - right-attachments below, and each of them alone gives a lower - probability (through multiplication) to the upper tree (so sum - them all) -- P_{CHOOSE}(a|h,R) = \sum_{corpus} \sum_{s=loc(h)} \sum_{r>s} - inner(s,r,_a_,...) / \sum_{corpus} \sum_{s=loc(h)} \sum_{t} - inner(s,t,h, ...) -** DONE [#A] How do we only count from completed trees? - CLOSED: [2008-06-13 Fri 11:40] -Use c(s,t,Node); inner * outer / P_sent - -** DONE [#A] c(s,t,Node) - CLOSED: [2008-06-13 Fri 11:38] -= inner * outer / P_sent - -implemented as inner * outer / inner_sent - -* TOGROK Find Most Probable Parse of given test sentence -We could probably do it pretty easily from the chart: -- for all loc_h, select the (0,len(sent),ROOT,loc_h) that has the highest p -- for stops, just keep going -- for rewrites, for loc_dep select the one that has the highest p -* TOGROK Combine CCM with DMV -* TODO Get a dependency parsed version of WSJ to test with -* Initialization -[[file:~/Documents/Skole/V08/Probability/dmvccm/src/dmv.py::Initialization%20todo][dmv-inits]] - -We go through the corpus, since the probabilities are based on how far -away in the sentence arguments are from their heads. -** TOGROK CCM Initialization -P_{SPLIT} used here... how, again? -** DONE Separate initialization to another file? :PRETTIER: - CLOSED: [2008-06-08 Sun 12:51] -[[file:src/harmonic.py::harmonic%20py%20initialization%20for%20dmv][harmonic.py]] -** DONE DMV Initialization probabilities -(from initialization frequency) -** DONE DMV Initialization frequencies - CLOSED: [2008-05-27 Tue 20:04] -*** P_STOP -P_{STOP} is not well defined by K&M. One possible interpretation given -the sentence [det nn vb nn] is -: f_{STOP}( STOP|det, L, adj) +1 -: f_{STOP}(-STOP|det, L, adj) +0 -: f_{STOP}( STOP|det, L, non_adj) +1 -: f_{STOP}(-STOP|det, L, non_adj) +0 -: f_{STOP}( STOP|det, R, adj) +0 -: f_{STOP}(-STOP|det, R, adj) +1 -: -: f_{STOP}( STOP|nn, L, adj) +0 -: f_{STOP}(-STOP|nn, L, adj) +1 -: f_{STOP}( STOP|nn, L, non_adj) +1 # since there's at least one to the left -: f_{STOP}(-STOP|nn, L, non_adj) +0 -**** TODO tweak -# <> -: f[head, 'STOP', 'LN'] += (i_h <= 1) # first two words -: f[head, '-STOP', 'LN'] += (not i_h <= 1) -: f[head, 'STOP', 'LA'] += (i_h == 0) # very first word -: f[head, '-STOP', 'LA'] += (not i_h == 0) -: f[head, 'STOP', 'RN'] += (i_h >= n - 2) # last two words -: f[head, '-STOP', 'RN'] += (not i_h >= n - 2) -: f[head, 'STOP', 'RA'] += (i_h == n - 1) # very last word -: f[head, '-STOP', 'RA'] += (not i_h == n - 1) -vs -: # this one requires some additional rewriting since it -: # introduces divisions by zero -: f[head, 'STOP', 'LN'] += (i_h == 1) # second word -: f[head, '-STOP', 'LN'] += (not i_h <= 1) # not first two -: f[head, 'STOP', 'LA'] += (i_h == 0) # first word -: f[head, '-STOP', 'LA'] += (not i_h == 0) # not first -: f[head, 'STOP', 'RN'] += (i_h == n - 2) # second-to-last -: f[head, '-STOP', 'RN'] += (not i_h >= n - 2) # not last two -: f[head, 'STOP', 'RA'] += (i_h == n - 1) # last word -: f[head, '-STOP', 'RA'] += (not i_h == n - 1) # not last -vs -: f[head, 'STOP', 'LN'] += (i_h == 1) # second word -: f[head, '-STOP', 'LN'] += (not i_h == 1) # not second -: f[head, 'STOP', 'LA'] += (i_h == 0) # first word -: f[head, '-STOP', 'LA'] += (not i_h == 0) # not first -: f[head, 'STOP', 'RN'] += (i_h == n - 2) # second-to-last -: f[head, '-STOP', 'RN'] += (not i_h == n - 2) # not second-to-last -: f[head, 'STOP', 'RA'] += (i_h == n - 1) # last word -: f[head, '-STOP', 'RA'] += (not i_h == n - 1) # not last -vs -"all words take the same number of arguments" interpreted as -:for all heads: -: p_STOP(head, 'STOP', 'LN') = 0.3 -: p_STOP(head, 'STOP', 'LA') = 0.5 -: p_STOP(head, 'STOP', 'RN') = 0.4 -: p_STOP(head, 'STOP', 'RA') = 0.7 -(which we easily may tweak in init_zeros()) -*** P_CHOOSE -Go through the corpus, counting distances between heads and -arguments. In [det nn vb nn], we give -- f_{CHOOSE}(nn|det, R) +1/1 + C -- f_{CHOOSE}(vb|det, R) +1/2 + C -- f_{CHOOSE}(nn|det, R) +1/3 + C - - If this were the full corpus, P_{CHOOSE}(nn|det, R) would have - (1+1/3+2C) / sum_a f_{CHOOSE}(a|det, R) - -The ROOT gets "each argument with equal probability", so in a sentence -of three words, 1/3 for each (in [nn vb nn], 'nn' gets 2/3). Basically -just a frequency count of the corpus... - -In a sense there are no terminal probabilities, since an /h/ can only -rewrite to an 'h' anyway. -* [#C] Deferred -http://wiki.python.org/moin/PythonSpeed/PerformanceTips Eg., use -map/reduce/filter/[i for i in [i's]]/(i for i in [i's]) instead of -for-loops; use local variables for globals (global variables or or -functions), etc. - -** TODO when reestimating P_STOP etc, remove rules with p < epsilon :OPTIMIZE: -** TODO inner_dmv, short ranges and impossible attachment :OPTIMIZE: -If s-t <= 2, there can be only one attachment below, so don't recurse -with both Lattach=True and Rattach=True. - -If s-t <= 1, there can be no attachment below, so only recurse with -Lattach=False, Rattach=False. - -Put this in the loop under rewrite rules (could also do it in the STOP -section, but that would only have an effect on very short sentences). -** TODO clean up the module files :PRETTIER: -Is there better way to divide dmv and harmonic? There's a two-way -dependency between the modules. Guess there could be a third file that -imports both the initialization and the actual EM stuff, while a file -containing constants and classes could be imported by all others: -: dmv.py imports dmv_EM.py imports dmv_classes.py -: dmv.py imports dmv_inits.py imports dmv_classes.py - -** TOGROK Some (tagged) sentences are bound to come twice :OPTIMIZE: -Eg, first sort and count, so that the corpus -[['nn','vbd','det','nn'], - ['vbd','nn','det','nn'], - ['nn','vbd','det','nn']] -becomes -[(['nn','vbd','det','nn'],2), - (['vbd','nn','det','nn'],1)] -and then in each loop through sentences, make sure we handle the -frequency correctly. - -Is there much to gain here? - -** TOGROK tags as numbers or tags as strings? :OPTIMIZE: -Need to clean up the representation. - -Stick with tag-strings in initialization then switch to numbers for -IO-algorithm perhaps? Can probably afford more string-matching in -initialization.. -** DONE if loc_h == t, no need to try right-attachment rules &v.v. :OPTIMIZE: - CLOSED: [2008-06-10 Tue 14:34] -(and if loc_h == s, no need to try left-attachment rules.) - -Modest speed increase (5%). -** DONE io.debug parameters should not call functions :OPTIMIZE: - CLOSED: [2008-06-10 Tue 12:26] -Exchanged all io.debug(str,'level') calls with statements of the form: -:if 'level' in io.DEBUG: -: print str - -and got an almost threefold speed increase on inner(). -** DONE inner_dmv() should disregard rules with heads not in sent :OPTIMIZE: - CLOSED: [2008-06-08 Sun 10:18] -If the sentence is "nn vbd det nn", we should not even look at rules -where -: rule.head() not in "nn vbd det nn".split() -This is ruled out by getting rules from g.rules(LHS, sent). - -Also, we optimize this further by saying we don't even recurse into -attachment rules where -: rule.head() not in sent[ s :r+1] -: rule.head() not in sent[r+1:t+1] -meaning, if we're looking at the span "vbd det", we only use -attachment rules where both daughters are members of ['vbd','det'] -(although we don't (yet) care about removing rules that rewrite to the -same tag if there are no duplicate tags in the span, etc., that would -be a lot of trouble for little potential gain). -* Adjacency and combining it with the inside-outside algorithm -Each DMV_Rule has both a probN and a probA, for adjacencies. inner() -and outer() needs the correct one in each case. - -In each inner() call, loc_h is the location of the head of this -dependency structure. In each outer() call, it's the head of the /Node/, -the structure we're looking outside of. - -We call inner() for each location of a head, and on each terminal, -loc_h must equal s (and t). In the recursive attachment calls, we use -the locations (sentence indices) of words to the left or right of the -head in calls to inner(). /loc_h lets us check whether we need probN or -probA/. -** Possible alternate type of adjacency -K&M's adjacency is just whether or not an argument has been generated -in the current direction yet. One could also make a stronger type of -adjacency, where h and a are not adjacent if b is in between, eg. with -the sentence "a b h" and the structure ((h->a), (a->b)), h is -K&M-adjacent to a, but not next to a, since b is in between. It's easy -to check this type of adjacency in inner(), but it needs new rules for -P_STOP reestimation. -* Expectation Maximation in IO/DMV-terms -outer(s,t,Node) and inner(s,t,Node) calculates the expected number of -trees (CNF-)headed by Node from *s* to *t* (sentence positions). This uses -the P_STOP and P_CHOOSE values, which have been conveniently -distributed into CNF rules as probN and probA (non-adjacent and -adjacent probabilites). - -When re-estimating, we use the expected values from outer() and -inner() to get new values for P_STOP and P_CHOOSE. When we've -re-estimated for the entire corpus, we distribute P_STOP and P_CHOOSE -into the CNF rules again, so that in the next round we use new probN -and probA to find outer- and inner-probabilites. - -The distribution of P_STOP and P_CHOOSE into CNF rules also happens in -init_normalize() (here along with the creation of P_STOP and -P_CHOOSE); P_STOP is used to create CNF rules where one branch of the -rule is STOP, P_CHOOSE is used to create rules of the form -: h -> h _a_ -: h_ -> h_ _a_ - -Since "adjacency" is not captured in regular CNF rules, we need two -probabilites for each rule, and outer() and inner() have to know when -to use which. - - -* Python-stuff -# <> -Make those debug statements steal a bit less attention in emacs: -:(font-lock-add-keywords -: 'python-mode ; not really regexp, a bit slow -: '(("^\\( *\\)\\(\\if +'.+' +in +io.DEBUG. *\\( -:\\1 .+$\\)+\\)" 2 font-lock-preprocessor-face t))) -:(font-lock-add-keywords -: 'python-mode -: '(("\\<\\(\\(io\\.\\)?debug(.+)\\)" 1 font-lock-preprocessor-face t))) - -- [[file:src/pseudo.py][pseudo.py]] -- http://nltk.org/doc/en/structured-programming.html recursive dynamic -- http://nltk.org/doc/en/advanced-parsing.html -- http://jaynes.colorado.edu/PythonIdioms.html - - - -* Git -Repository web page: http://repo.or.cz/w/dmvccm.git - -Setting up a new project: -: git init -: git add . -: git commit -m "first release" - -Later on: (=-a= does =git rm= and =git add= automatically) -: git init -: git commit -a -m "some subsequent release" - -Then push stuff up to the remote server: -: git push git+ssh://username@repo.or.cz/srv/git/dmvccm.git master - -(=eval `ssh-agent`= and =ssh-add= to avoid having to type in keyphrase all -the time) - -Make a copy of the (remote) master branch: -: git clone git://repo.or.cz/dmvccm.git - -Make and name a new branch in this folder -: git checkout -b mybranch - -To save changes in =mybranch=: -: git commit -a - -Go back to the master branch (uncommitted changes from =mybranch= are -carried over): -: git checkout master - -Try out: -: git add --interactive - -Good tutorial: -http://www-cs-students.stanford.edu/~blynn//gitmagic/ diff --git a/tex/test.org.~1~ b/tex/test.org.~1~ deleted file mode 100755 index a91acd7..0000000 --- a/tex/test.org.~1~ +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: mule-utf-8-unix -*- - -#+STARTUP: overview -#+TAGS: OPTIMIZE PRETTIER -#+STARTUP: hidestars -#+TITLE: DMV/CCM -- todo-list / progress -#+AUTHOR: Kevin Brubeck Unhammer -#+EMAIL: K.BrubeckUnhammer at student uva nl -#+OPTIONS: H:4 -#+OPTIONS: toc:3 -#+OPTIONS: ^:{} -#+LANGUAGE: en -#+SEQ_TODO: TOGROK TODO DONE - -* dmvccm report and project diff --git a/tex/test.tex.~1~ b/tex/test.tex.~1~ deleted file mode 100644 index a75e084..0000000 --- a/tex/test.tex.~1~ +++ /dev/null @@ -1,49 +0,0 @@ -% Created 2008-06-13 Fri 17:05 -\documentclass[11pt,a4paper]{article} -\usepackage[utf8]{inputenc} -\usepackage[T1]{fontenc} -\usepackage{hyperref} -\usepackage{natbib} - -\usepackage{pslatex} -\usepackage{pdfsync} -\pdfoutput=1 - -\usepackage{avm} -\avmfont{\sc} -\avmoptions{sorted,active} -\avmvalfont{\rm} -\avmsortfont{\scriptsize\it} - - -\title{DMV/CCM -- tex stuff} -\author{Kevin Brubeck Unhammer} -\date{13 June 2008} - -\begin{document} - -\maketitle - -\setcounter{tocdepth}{3} -\tableofcontents - - - -\section{stuff} - - -\Tree [.\{$h\urcorner$} [.{$_s \ulcorner a\urcorner _t$\\ - Node} ] {$_{t+1} h\urcorner _r$\\ - R} ] -\Tree [.{$h$} {$_{s} h _{t}$\\ - Node} [.{$_{t+1} \ulcorner a\urcorner _r$\\ - R} ] ] -\Tree [.{$h\urcorner$} [.{$_r \ulcorner a\urcorner _{s-1}$\\ - L} ] {$_{s} h\urcorner _t$\\ - Node} ] -\Tree [.{$h$} {$_{r} h _{s-1}$\\ - L} [.{$_{s} \ulcorner a\urcorner _t$\\ - Node} ] ] - - -\end{document} \ No newline at end of file -- 2.11.4.GIT