fixed a little bug in root reestimation (forgot to divide by len(corpus)=f_T_q(ROOT))
[dmvccm.git] / tex / formulas.tex
blobb1be8f3e3dd2a84d27e81da1c71ec5a02f6fc129
1 % Created 2008-06-13 Fri 17:05
3 % todo: fix stop and attachment formulas so they divide before summing
5 \documentclass[11pt,a4paper]{article}
6 \usepackage[utf8]{inputenc}
7 \usepackage[T1]{fontenc}
8 \usepackage{hyperref}
9 \usepackage{natbib}
11 \usepackage{pslatex}
12 \usepackage{pdfsync}
13 \pdfoutput=1
15 \usepackage{qtree}
16 \usepackage{amsmath}
17 \usepackage{amssymb}
19 \usepackage{avm}
20 \avmfont{\sc}
21 \avmoptions{sorted,active}
22 \avmvalfont{\rm}
23 \avmsortfont{\scriptsize\it}
25 \usepackage{array} % for a tabular with math in it
27 \title{DMV formulas}
28 \author{Kevin Brubeck Unhammer}
29 \date{21 August 2008}
31 \begin{document}
33 \maketitle
35 This is an attempt at fleshing out the details of the inside-outside
36 algorithm \citep{lari-csl90} applied to the DMV model of
37 \citet{klein-thesis}.
39 \tableofcontents
41 \newcommand{\LOC}[1]{\textbf{#1}}
42 \newcommand{\GOR}[1]{\overrightarrow{#1}}
43 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
44 \newcommand{\SEAL}[1]{\overline{#1}}
45 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
46 \newcommand{\GOL}[1]{\overleftarrow{#1}}
47 \newcommand{\LN}[1]{\underleftarrow{#1}}
48 \newcommand{\RN}[1]{\underrightarrow{#1}}
49 \newcommand{\XI}{\lessdot}
50 \newcommand{\XJ}{\gtrdot}
51 \newcommand{\SMTR}[1]{\dddot{#1}}
52 \newcommand{\SDTR}[1]{\ddot{#1}}
54 \section{Note on notation}
55 $i, j, k$ are sentence positions (between words), where $i$ and $j$
56 are always the start and end, respectively, for what we're calculating
57 ($k$ is between $i$ and $j$ for $P_{INSIDE}$, to their right or left
58 for $P_{OUTSIDE}$). $s \in S$ are sentences in the corpus. $\LOC{w}$
59 is a word token (actually POS-token) of type $w$ at a certain sentence
60 location. If $\LOC{w}$ is between $i$ and $i+1$, $loc(\LOC{w})=i$
61 following \citet{klein-thesis}, meaning $i$ is adjacent to $\LOC{w}$
62 on the left, while $j=loc(\LOC{w})+1$ means that $j$ is adjacent to
63 $\LOC{w}$ on the right. To simplify, $loc_l(\LOC{w}):=loc(\LOC{w})$ and
64 $loc_r(\LOC{w}):=loc(\LOC{w})+1$. We write $\LOC{h}$ if this is a head
65 in the rule being used, $\LOC{a}$ if it is an attached argument.
67 Notational differences between the thesis \citep{klein-thesis} and the
68 paper \citep{km-dmv}:
70 \begin{tabular}{cc}
71 Paper: & Thesis: \\
72 $w$ & $\GOR{w}$ \\
73 $w\urcorner$ & $\RGOL{w}$ \\
74 $\ulcorner{w}\urcorner$ & $\SEAL{w}$ \\
75 \end{tabular}
77 I use $\SMTR{w}$ (or $\SDTR{w}$) to signify one of either $w, \GOR{w},
78 \RGOL{w}, \LGOR{w}, \GOL{w}$ or $\SEAL{w}$\footnote{This means that
79 $\SMTR{\LOC{w}}$ is the triplet of the actual POS-tag, its sentence
80 location as a token, and the ``level of seals''.}.
83 \section{Inside probabilities}
84 $P_{INSIDE}$ is defined in \citet[pp.~106-108]{klein-thesis}, the only
85 thing we need to add is that for right attachments,
86 $i \leq loc_l(w)<k \leq loc_l(\LOC{a})<j$ while for left attachments,
87 $i \leq loc_l(\LOC{a})<k \leq loc_l(w)<j$.
89 (For now, let
90 \[ \forall{w}[P_{ORDER}(right\text{-}first|w)=1.0] \] since the DMV implementation
91 is not yet generalized to both directions.)
97 \subsection{Sentence probability}
99 $P_s$ is the sentence probability, based on
100 \citet[p.~38]{lari-csl90}. Since the ROOT rules are different from the
101 rest, we sum them explicitly in this definition:
102 \begin{align*}
103 P_s = \sum_{\LOC{w} \in s} P_{ROOT}(\LOC{w}) P_{INSIDE}(\SEAL{\LOC{w}}, 0, len(s))
104 \end{align*}
106 \section{Outside probabilities}
108 \begin{align*}
109 P_{OUTSIDE_s}(ROOT, i, j) = \begin{cases}
110 1.0 & \text{ if $i = 0$ and $j = len(s)$,}\\
111 0.0 & \text{ otherwise}
112 \end{cases}
113 \end{align*}
115 For $P_{OUTSIDE}(\SEAL{w}, i, j)$, $w$ is attached to under something
116 else ($\SEAL{w}$ is what we elsewhere call $\SEAL{a}$). Adjacency is
117 thus calculated on the basis of $h$, the head of the rule. If we are
118 attached to from the left we have $i \leq loc_l(\LOC{w}) < j \leq loc_l(\LOC{h}) < k$, while
119 from the right we have $k \leq loc_l(\LOC{h}) < i \leq loc_l(\LOC{w}) < j$:
120 \begin{align*}
121 P_{OUTSIDE}&(\SEAL{\LOC{w}}, i, j) = \\
122 & P_{ROOT}(w) P_{OUTSIDE}(ROOT, i, j) + \\
123 & [ \sum_{k > j} ~ \sum_{\LOC{h}:j\leq loc_l(\LOC{h})<k} \sum_{\SMTR{\LOC{h}} \in \{\RGOL{\LOC{h}},\GOL{\LOC{h}}\}} P_{STOP}(\neg stop|h, left, adj(j, \LOC{h})) P_{ATTACH}(w|h, left) \\
124 & \qquad \qquad \qquad \qquad \qquad P_{OUTSIDE}(\SMTR{\LOC{h}}, i, k) P_{INSIDE}(\SMTR{\LOC{h}}, j, k) ] ~ + \\
125 & [ \sum_{k < i} ~ \sum_{\LOC{h}:k\leq loc_l(\LOC{h})<i} \sum_{\SMTR{\LOC{h}} \in \{\LGOR{\LOC{h}},\GOR{\LOC{h}}\}} P_{STOP}(\neg stop|h, right, adj(i, \LOC{h})) P_{ATTACH}(w|h, right) \\
126 & \qquad \qquad \qquad \qquad \qquad P_{INSIDE}(\SMTR{\LOC{h}}, k, i) P_{OUTSIDE}(\SMTR{\LOC{h}}, k, j) ]
127 \end{align*}
129 For $\RGOL{w}$ we know it is either under a left stop rule or it is
130 the right daughter of a left attachment rule ($k \leq loc_l(\LOC{a}) <
131 i \leq loc_l(\LOC{w}) < j$), and these are adjacent if the start point
132 ($i$) equals $loc_l(\LOC{w})$:
133 \begin{align*}
134 P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
135 \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
136 & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
137 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\RGOL{\LOC{w}}, k, j) ]
138 \end{align*}
140 For $\GOR{w}$ we are either under a right stop or the left daughter of
141 a right attachment rule ($i \leq loc_l(\LOC{w}) < j \leq
142 loc_l(\LOC{a}) < k$), adjacent iff the the end point ($j$) equals
143 $loc_r(\LOC{w})$:
144 \begin{align*}
145 P_{OUTSIDE}(\GOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
146 \LOC{w}))P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) ~ + \\
147 & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
148 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\GOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
149 \end{align*}
151 $\GOL{w}$ is just like $\RGOL{w}$, except for the outside probability
152 of having a stop above, where we use $\LGOR{w}$:
153 \begin{align*}
154 P_{OUTSIDE}(\GOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
155 \LOC{w}))P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) ~ + \\
156 & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
157 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\GOL{\LOC{w}}, k, j) ]
158 \end{align*}
160 $\LGOR{w}$ is just like $\GOR{w}$, except for the outside probability
161 of having a stop above, where we use $\SEAL{w}$:
162 \begin{align*}
163 P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
164 \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
165 & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
166 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\LGOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
167 \end{align*}
170 \section{Reestimating the rules} % todo
172 \subsection{$c$ and $w$ (helper formulas used below)}
173 % todo: left out rule-probability! wops (also, make P_{fooside}
174 % sentence-specific)
176 $c_s(\SMTR{\LOC{w}} : i, j)$ is ``the expected fraction of parses of
177 $s$ with a node labeled $\SMTR{w}$ extending from position $i$ to
178 position $j$'' \citep[p.~88]{klein-thesis}, here defined to equal
179 $v_{q}$ of \citet[p.~41]{lari-csl90}\footnote{In terms of regular EM,
180 this is the count of trees ($f_{T_q}(x)$ in
181 \citet[p.~46]{prescher-em}) in which the node extended from $i$ to
182 $j$.}:
183 \begin{align*}
184 c_s(\SMTR{\LOC{w}} : i, j) = P_{INSIDE_s}(\SMTR{\LOC{w}}, i, j) P_{OUTSIDE_s}(\SMTR{\LOC{w}}, i, j) / P_s
185 \end{align*}
187 $w_s$ is $w_{q}$ from \citet[p.~41]{lari-csl90}, generalized to $\SMTR{h}$ and $dir$:
188 \begin{align*}
189 w_s(\SEAL{a} & : \SMTR{\LOC{h}}, left, i, j) = \\
190 & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:i\leq loc_l(\LOC{a})<k}
191 & P_{STOP}(\neg stop|h, left, adj(k, \LOC{h})) P_{CHOOSE}(a|h, left) \\
192 & & P_{INSIDE_s}(\SEAL{\LOC{a}}, i, k) P_{INSIDE_s}(\SMTR{\LOC{h}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
193 \end{align*}
194 \begin{align*}
195 w_s(\SEAL{a} & : \SMTR{\LOC{h}}, right, i, j) = \\
196 & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<j}
197 & P_{STOP}(\neg stop|h, right, adj(k, \LOC{h})) P_{CHOOSE}(a|h, right) \\
198 & & P_{INSIDE_s}(\SMTR{\LOC{h}}, i, k) P_{INSIDE_s}(\SEAL{\LOC{a}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
199 \end{align*}
201 Let $\hat{P}$ be the new STOP/CHOOSE-probabilities (since the old $P$
202 are used in $P_{INSIDE}$ and $P_{OUTSIDE}$).
204 \subsection{Attachment reestimation}
206 $\hat{a}$ is given in \citet[p.~41]{lari-csl90}. Here $i<loc_l(\LOC{h})$
207 since we want trees with at least one attachment:
208 \begin{align*}
209 \hat{a} (a | \SMTR{h}, left) = \frac
210 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, left, i, j) }
211 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
212 \end{align*}
214 Here $j>loc_r(\SMTR{\LOC{h}})$ since we want at least one attachment:
215 \begin{align*}
216 \hat{a} (a | \SMTR{h}, right) = \frac
217 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, right, i, j) }
218 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
219 \end{align*}
221 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
222 where $i<loc_l(\LOC{h})$ (for $\GOR{h}$) or $j>loc_r(\LOC{h})$ (for $\GOL{h}$),
223 this is implicit in $P_{INSIDE}$.
227 \begin{align*}
228 \hat{P}_{CHOOSE} (a | h, left) =
229 \hat{a} (a | \GOL{h}, left)
230 + \hat{a} (a | \RGOL{h}, left)
231 \end{align*}
232 \begin{align*}
233 \hat{P}_{CHOOSE} (a | h, right) =
234 \hat{a} (a | \GOR{h},right)
235 + \hat{a} (a | \LGOR{h},right)
236 \end{align*}
238 \subsection{Stop reestimation}
239 The following is based on \citet[p.~88]{klein-thesis}. For the
240 non-adjacent rules, $i<loc_l(\LOC{h})$ on the left and $j>loc_r(\LOC{h})$ on the
241 right, while for the adjacent rules these are equal (respectively).
243 To avoid some redundancy below, define a helper function $\hat{d}$ as follows:
244 \begin{align*}
245 \hat{d}(\SMTR{h},\SDTR{h},\XI,\XJ) = \frac
246 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
247 { \sum_{s \in S} \sum_{\SDTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SDTR{\LOC{h}} : i, j) }
248 \end{align*}
250 Then these are our reestimated stop probabilities:
251 \begin{align*}
252 \hat{P}_{STOP} (STOP|h, left, non\text{-}adj) =
253 \hat{d}(\SEAL{h}, \RGOL{h},<,\geq) +
254 \hat{d}(\LGOR{h}, \GOL{h},<,=)
255 \end{align*}
257 \begin{align*}
258 \hat{P}_{STOP} (STOP|h, left, adj) =
259 \hat{d}(\SEAL{h}, \RGOL{h},=,\geq) +
260 \hat{d}(\LGOR{h}, \GOL{h},=,=)
261 \end{align*}
263 \begin{align*}
264 \hat{P}_{STOP} (STOP|h, right, non\text{-}adj) =
265 \hat{d}(\RGOL{h}, \GOR{h},=,>) +
266 \hat{d}(\SEAL{h}, \LGOR{h},\leq,>)
267 \end{align*}
269 \begin{align*}
270 \hat{P}_{STOP} (STOP|h, right, adj) =
271 \hat{d}(\RGOL{h}, \GOR{h},=,=) +
272 \hat{d}(\SEAL{h}, \LGOR{h},\leq,=)
273 \end{align*}
276 \subsection{Root reestimation}
277 Following \citet[p.~46]{prescher-em}, to find the reestimated
278 probability of a PCFG rule, we first find the new treebank frequencies
279 $f_{T_P}(tree)=P(tree)/P_s$, then for a rule $X' \rightarrow X$ we
280 divide the new frequencies of the trees which use this rule and by
281 those of the trees containing the node $X'$. $ROOT$ appears once per
282 tree, meaning we divide by $1$ per sentence\footnote{Assuming each
283 tree has frequency $1$.}, so $\hat{P}_{ROOT}(h)=\sum_{tree:ROOT
284 \rightarrow \SEAL{h} \text{ used in } tree} f_{T_P}(tree)=\sum_{tree:ROOT
285 \rightarrow \SEAL{h} \text{ used in } tree} P(tree)/P_s$, which turns into:
287 \begin{align*}
288 \hat{P}_{ROOT} (h) = \frac
289 {\sum_{s\in S} 1 / P_s \cdot \sum_{\LOC{h}\in s} P_{ROOT}(\LOC{h}) P_{INSIDE_s}(\SEAL{h}, 0, len(s))}
290 {\sum_{s\in S} 1}
291 \end{align*}
293 % todo: the following just didn't turn out right, but it ought to be
294 % possible to use this to check the CNF-like unary attachments...
296 % The root attachment is just a unary PCFG rule $ROOT \rightarrow
297 % \SEAL{h}$. Modifiying binary rule reestimation from
298 % \citet[p.~41]{lari-csl90} to unary rules gives:
300 % \begin{align*}
301 % \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
302 % { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, i, j)P_{OUTSIDE_s}(ROOT, i, j) }
303 % { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SEAL{h}, i, j) }
304 % \end{align*}
306 % Since $P_{OUTSIDE}(ROOT, i, j)$ is $1$ if $i=0$ and $j=len(s)$, and $0$ otherwise, this reduces into:
308 % \begin{align*}
309 % \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
310 % { \sum_{s\in S} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
311 % { \sum_{s\in S} 1/P_s \cdot P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
312 % \end{align*}
318 \section{Alternate CNF-like rules}
319 Since the IO algorithm as described in \citet{lari-csl90} is made for
320 rules in CNF, we have an alternate grammar (figure \ref{cnf-like}) for
321 running parallell tests where we don't have to sum over the different
322 $loc(h)$ in IO. This is not yet generalized to include left-first
323 attachment. It is also not quite CNF, since it includes some unary
324 rewrite rules.
326 \begin{figure}[htp]
327 \centering
328 \begin{tabular} % four left-aligned math tabs, one vertical line
329 { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
330 \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
331 \hline{}
333 \RN{\GOR{h}} \rightarrow& \GOR{h} &\SEAL{a} &P_{STOP}(\neg stop|h, right, adj) \cdot P_{ATTACH}(a|h, right) \\
334 &&&\\
335 \RN{\GOR{h}} \rightarrow& \RN{\GOR{h}} &\SEAL{a} &P_{STOP}(\neg stop|h, right, non\text{-}adj) \cdot P_{ATTACH}(a|h, right) \\
336 &&&\\
337 \RGOL{h} \rightarrow& \GOR{h} &STOP &P_{STOP}(stop|h, right, adj) \\
338 &&&\\
339 \RGOL{h} \rightarrow& \RN{\GOR{h}} &STOP &P_{STOP}(stop|h, right, non\text{-}adj) \\
340 &&&\\
341 \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\RGOL{h} &P_{STOP}(\neg stop|h, left, adj) \cdot P_{ATTACH}(a|h, left) \\
342 &&&\\
343 \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\LN{\RGOL{h}} &P_{STOP}(\neg stop|h, left, non\text{-}adj) \cdot P_{ATTACH}(a|h, left) \\
344 &&&\\
345 \SEAL{h} \rightarrow& STOP &\RGOL{h} &P_{STOP}(stop|h, left, adj) \\
346 &&&\\
347 \SEAL{h} \rightarrow& STOP &\LN{\RGOL{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
348 \end{tabular}
349 \caption{Alternate CFG rules (where a child node has an arrow below,
350 we use non-adjacent probabilities), defined for all words/POS-tags
351 $h$.}\label{cnf-like}
352 \end{figure}
354 The inside probabilities are the same as those given in
355 \citet{lari-csl90}, with the following exceptions:
357 When calculating $P_{INSIDE}(\SMTR{h}, i, j)$ and summing through possible
358 rules which rewrite $\SMTR{h}$, if a rule is of the form $\SMTR{h} \rightarrow
359 STOP ~ \SDTR{h}$ or $\SMTR{h} \rightarrow \SDTR{h} ~ STOP$, we add $P_{RULE}\cdot
360 P_{INSIDE}(\SDTR{h}, i, j)$ (that is, rewrite for the same sentence range);
361 and, as a consequence of these unary rules: for ``terminal rules''
362 ($P_{ORDER}$) to be applicable, not only must $i = j-1$, but also the
363 left-hand side symbol of the rule must be of the form $\GOR{h}$.
365 Similarly, the outside probabilities are the same as those for pure
366 CNF rules, with the exception that we add the unary rewrite
367 probabilities
368 \begin{align*}
369 \sum_{\SMTR{h}} [&P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow \SDTR{h} ~ STOP) \\
370 + &P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow STOP ~ \SDTR{h})]
371 \end{align*}
372 to $P_{OUTSIDE}(\SDTR{h},i,j)$ (eg. $f(s,t,i)$).
374 The reestimation just needs to be expanded to allow unary stop rules,
375 this is similar to the formula for binary rules in
376 \citet[p.~41]{lari-csl90}:
377 \begin{align*}
378 \hat{P}_{RULE}(\SMTR{h} \rightarrow \SDTR{h}) = \frac
379 { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(\SMTR{h}\rightarrow \SDTR{h})P_{INSIDE}(\SDTR{h}, i, j)P_{OUTSIDE}(\SMTR{h}, i, j) }
380 { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SMTR{h}, i, j) }
381 \end{align*}
382 (the denominator is unchanged, but the numerator sums $w_s$ defined
383 for unary rules; $\SMTR{h}\rightarrow \SDTR{h}$ is meant to include both left and
384 right stop rules).
386 % todo: add ROOT rule to CNF grammar?
389 \section{Pure CNF rules}
390 Alternatively, one may use the ``pure CNF'' grammar of figure
391 \ref{cnf-T} and figure \ref{cnf-NT}, below (not yet
392 implemented). Using the harmonic distribution to initialize this will
393 still yield a mass-deficient grammar, but at least this allows the
394 regular IO algorithm to be run.
396 \begin{figure}[htp]
397 \centering
398 \begin{tabular} % four left-aligned math tabs, one vertical line
399 { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
400 \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($b[i,m]$ in \citet{lari-csl90})}\\
401 \hline{}
403 h \rightarrow& \text{``h''}& &1 \\
404 &&&\\
405 \SEAL{h} \rightarrow& \text{``h''}& &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, adj) \\
406 &&&\\
407 ROOT \rightarrow& \text{``h''}& &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, adj) \cdot P_{ROOT}(h) \\
408 \end{tabular}
409 \caption{Terminals of pure CNF rules, defined for all POS-tags
410 $h$.}\label{cnf-T}
411 \end{figure}
413 \begin{figure}[htp]
414 \centering
415 \begin{tabular} % four left-aligned math tabs, one vertical line
416 { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
417 \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
418 \hline{}
420 \SEAL{h} \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, non\text{-}adj) \\
421 &&&\cdot P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
422 \SEAL{h} \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
423 \GOR{.h} \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\
424 &&&\cdot P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
425 \GOR{.h} \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
426 \GOR{h} \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\
427 &&&\cdot P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
428 \GOR{h} \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
429 \SEAL{h} \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot P_{STOP}(stop|h, right, adj) \\
430 &&&\cdot P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
431 \SEAL{h} \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot P_{STOP}(stop|h, right, adj) \\[3.5pt]
432 \GOL{h.} \rightarrow& \SEAL{b} &\SEAL{a} &P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
433 &&&\cdot P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
434 \GOL{h.} \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
435 \GOL{h} \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
436 &&&\cdot P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
437 \GOL{h} \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
438 \SEAL{h} \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\[3.5pt]
439 \SEAL{h} \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
440 &&&\cdot P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
441 \SEAL{\GOR{h}} \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
442 \SEAL{\GOR{h}} \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, right, non\text{-}adj) \\
443 &&&\cdot P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
444 ROOT \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, non\text{-}adj) \\
445 &&&\cdot P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \cdot P_{ROOT}(h) \\[3.5pt]
446 ROOT \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, left, adj) \cdot P_{STOP}(stop|h, right, non\text{-}adj) \cdot P_{ROOT}(h) \\[3.5pt]
447 ROOT \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot P_{STOP}(stop|h, right, adj) \\
448 &&&\cdot P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot P_{ROOT}(h) \\[3.5pt]
449 ROOT \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot P_{STOP}(stop|h, right, adj) \cdot P_{ROOT}(h) \\[3.5pt]
450 ROOT \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \cdot P_{ROOT}(h) \\[3.5pt]
451 ROOT \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
452 &&&\cdot P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot P_{ROOT}(h) \\[3.5pt]
453 \end{tabular}
454 \caption{Non-terminals of pure CNF rules, defined for all POS-tags
455 $h$, $a$ and $b$.}\label{cnf-NT}
456 \end{figure}
461 \bibliography{./statistical.bib}
462 \bibliographystyle{plainnat}
464 \end{document}