1 # import numpy # numpy provides Fast Arrays, for future optimization
4 e_chart = [] # stupid globals, need to do this prettier
7 '''A single CNF rule in the PCFG, of the form
9 where these are just integers
10 (where do we save the connection between number and symbol?
11 symbols being 'vbd' etc.)'''
13 return "%d -> %d %d [%.2f]" % (self.head, self.R, self.L, self.prob)
14 def __init__(self, head, L, R, prob):
21 '''The PCFG used in the I/O-algorithm.'''
22 def rules(self, head):
23 return [rule for rule in self.p_rules if rule.head == head]
25 def __init__(self, p_rules, p_terminals):
26 '''p_rules and p_terminals should be arrays, where p_terminals are of
27 the form [preterminal, terminal], and p_rules are CNF_Rule's.'''
28 self.p_rules = p_rules # could check for summing to 1 (+/- epsilon)
29 self.p_terminals = p_terminals
32 def inner(s, t, i, g, sent):
33 ''' Give the inner probability of having the node i cover whatever's
34 between s and t in sentence sent, using grammar g.
36 For DMV, i is a pair (bar, h), but this function ought to be
41 if (s, t, i) in e_chart:
42 return e_chart[(s, t, i)]
45 return g.p_terminals[i, O(s, sent)] # b[i, O(s)]
47 for rule in g.rules(i): # summing over j,k in a[i,j,k]
50 for r in range(s, t-1): # summing over r = s to r = t-1
51 e_chart[(s, t, i)] = rule.prob * inner(s, r, L, g, sent) * inner(r + 1, t, R, g, sent)
52 return e_chart[(s, t, i)]
55 ''' Give the POS-tag of the word at position s in sentence sent'''
60 if __name__ == "__main__":
61 print "IO-module tests:"
62 a1 = CNF_Rule(3,1,3, 0.1 )
63 a2 = CNF_Rule(1,3,3, 0.4 )
68 g = Grammar([a1,a2], b)
72 print "should be 0.10 : %.2f" % inner(0,0, 3, g, ['vbd'])
76 ##################################################################
77 # just junk from here on down: #
78 ##################################################################
83 # a = initialize(tagset)
84 # b = initialize_t(tagset)
86 ## now I should be able to say a[X->X Y]
90 # a[i,j,k] = sum sum sum w_q(s,t,i,j,k) / sum sum sum v_q(s,t,i)
92 # b[i,m] = sum sum v_q(t,t,i) / sum sum sum v_q(s,t,i)
96 def virahanka3(n, lookup={0:[""], 1:["S"]}):
97 '''An example of a top-down recursive dynamic function; perhaps
98 inner(s,t,i) could have e_chart as an argument also? (Or would we miss out
99 on certain values of e_chart then?)
101 Check whatever is faster:
102 >>> from timeit import Timer
103 >>> Timer("all_inner(sent, grammar)", "sent = 'nn vbd det nn foo bar baz' grammar = ...").timeit()
106 s = ["S" + prosody for prosody in virahanka3(n-1)]
107 l = ["L" + prosody for prosody in virahanka3(n-2)]