tex/formulas.tex

   1 % Created 2008-06-13 Fri 17:05
   2
   3 % todo: fix stop and attachment formulas so they divide before summing
   4
   5 \documentclass[11pt,a4paper]{article}
   6 \usepackage[utf8]{inputenc}
   7 \usepackage[T1]{fontenc}
   8 \usepackage{hyperref}
   9 \usepackage{natbib}
  10
  11 \usepackage{pslatex}
  12 \usepackage{pdfsync}
  13 \pdfoutput=1
  14
  15 \usepackage{qtree}
  16 \usepackage{amsmath}
  17 \usepackage{amssymb}
  18
  19 \usepackage{avm}
  20 \avmfont{\sc}
  21 \avmoptions{sorted,active}
  22 \avmvalfont{\rm}
  23 \avmsortfont{\scriptsize\it}
  24
  25 \usepackage{array} % for a tabular with math in it
  26
  27 \title{DMV formulas}
  28 \author{Kevin Brubeck Unhammer}
  29 \date{21 August 2008}
  30
  31 \begin{document}
  32
  33 \maketitle
  34
  35 This is an attempt at fleshing out the details of the inside-outside
  36 algorithm \citep{lari-csl90} applied to the DMV model of
  37 \citet{klein-thesis}.
  38
  39 \tableofcontents
  40
  41 \newcommand{\LOC}[1]{\textbf{#1}}
  42 \newcommand{\GOR}[1]{\overrightarrow{#1}}
  43 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
  44 \newcommand{\SEAL}[1]{\overline{#1}}
  45 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
  46 \newcommand{\GOL}[1]{\overleftarrow{#1}}
  47 \newcommand{\LN}[1]{\underleftarrow{#1}}
  48 \newcommand{\RN}[1]{\underrightarrow{#1}}
  49 \newcommand{\XI}{\lessdot}
  50 \newcommand{\XJ}{\gtrdot}
  51 \newcommand{\SMTR}[1]{\dddot{#1}}
  52 \newcommand{\SDTR}[1]{\ddot{#1}}
  53
  54 \section{Note on notation}
  55 $i, j, k$ are sentence positions (between words), where $i$ and $j$
  56 are always the start and end, respectively, for what we're calculating
  57 ($k$ is between $i$ and $j$ for $P_{INSIDE}$, to their right or left
  58 for $P_{OUTSIDE}$). $s \in S$ are sentences in the corpus. $\LOC{w}$
  59 is a word token (actually POS-token) of type $w$ at a certain sentence
  60 location. If $\LOC{w}$ is between $i$ and $i+1$, $loc(\LOC{w})=i$
  61 following \citet{klein-thesis}, meaning $i$ is adjacent to $\LOC{w}$
  62 on the left, while $j=loc(\LOC{w})+1$ means that $j$ is adjacent to
  63 $\LOC{w}$ on the right. To simplify, $loc_l(\LOC{w}):=loc(\LOC{w})$ and
  64 $loc_r(\LOC{w}):=loc(\LOC{w})+1$. We write $\LOC{h}$ if this is a head
  65 in the rule being used, $\LOC{a}$ if it is an attached argument.
  66
  67 Notational differences between the thesis \citep{klein-thesis} and the
  68 paper \citep{km-dmv}:
  69
  70 \begin{tabular}{cc}
  71 Paper: & Thesis: \\
  72 $w$ & $\GOR{w}$ \\
  73 $w\urcorner$ & $\RGOL{w}$ \\
  74 $\ulcorner{w}\urcorner$ & $\SEAL{w}$ \\
  75 \end{tabular}
  76
  77 I use $\SMTR{w}$ (or $\SDTR{w}$) to signify one of either $w, \GOR{w},
  78 \RGOL{w}, \LGOR{w}, \GOL{w}$ or $\SEAL{w}$\footnote{This means that
  79   $\SMTR{\LOC{w}}$ is the triplet of the actual POS-tag, its sentence
  80   location as a token, and the ``level of seals''.}.
  81
  82
  83 \section{Inside probabilities}
  84 $P_{INSIDE}$ is defined in \citet[pp.~106-108]{klein-thesis}, the only
  85 thing we need to add is that for right attachments,
  86 $i \leq loc_l(w)<k \leq loc_l(\LOC{a})<j$ while for left attachments,
  87 $i \leq loc_l(\LOC{a})<k \leq loc_l(w)<j$.
  88
  89 (For now, let
  90 \[ \forall{w}[P_{ORDER}(right\text{-}first|w)=1.0] \] since the DMV implementation
  91 is not yet generalized to both directions.)
  92
  93
  94
  95
  96
  97 \subsection{Sentence probability}
  98
  99 $P_s$ is the sentence probability, based on
 100 \citet[p.~38]{lari-csl90}. Since the ROOT rules are different from the
 101 rest, we sum them explicitly in this definition:
 102 \begin{align*}
 103   P_s = \sum_{\LOC{w} \in s} P_{ROOT}(\LOC{w}) P_{INSIDE}(\SEAL{\LOC{w}}, 0, len(s))
 104 \end{align*}
 105
 106 \section{Outside probabilities}
 107
 108 \begin{align*}
 109   P_{OUTSIDE_s}(ROOT, i, j) = \begin{cases}
 110     1.0 & \text{ if $i = 0$ and $j = len(s)$,}\\
 111     0.0 & \text{ otherwise}
 112   \end{cases}
 113 \end{align*}
 114
 115 For $P_{OUTSIDE}(\SEAL{w}, i, j)$, $w$ is attached to under something
 116 else ($\SEAL{w}$ is what we elsewhere call $\SEAL{a}$). Adjacency is
 117 thus calculated on the basis of $h$, the head of the rule. If we are
 118 attached to from the left we have $i \leq  loc_l(\LOC{w}) < j \leq  loc_l(\LOC{h}) < k$, while
 119 from the right we have $k \leq  loc_l(\LOC{h}) < i \leq  loc_l(\LOC{w}) < j$:
 120 \begin{align*}
 121   P_{OUTSIDE}&(\SEAL{\LOC{w}}, i, j) = \\
 122   & P_{ROOT}(w) P_{OUTSIDE}(ROOT, i, j) + \\
 123   & [ \sum_{k > j} ~ \sum_{\LOC{h}:j\leq loc_l(\LOC{h})<k} \sum_{\SMTR{\LOC{h}} \in \{\RGOL{\LOC{h}},\GOL{\LOC{h}}\}} P_{STOP}(\neg stop|h, left,  adj(j, \LOC{h})) P_{ATTACH}(w|h, left) \\
 124   & \qquad \qquad \qquad \qquad \qquad P_{OUTSIDE}(\SMTR{\LOC{h}}, i, k) P_{INSIDE}(\SMTR{\LOC{h}}, j, k) ] ~ + \\
 125   & [ \sum_{k < i} ~ \sum_{\LOC{h}:k\leq loc_l(\LOC{h})<i} \sum_{\SMTR{\LOC{h}} \in \{\LGOR{\LOC{h}},\GOR{\LOC{h}}\}} P_{STOP}(\neg stop|h, right, adj(i, \LOC{h})) P_{ATTACH}(w|h, right) \\
 126   & \qquad \qquad \qquad \qquad \qquad P_{INSIDE}(\SMTR{\LOC{h}}, k, i) P_{OUTSIDE}(\SMTR{\LOC{h}}, k, j) ]
 127 \end{align*}
 128
 129 For $\RGOL{w}$ we know it is either under a left stop rule or it is
 130 the right daughter of a left attachment rule ($k \leq loc_l(\LOC{a}) <
 131 i \leq loc_l(\LOC{w}) < j$), and these are adjacent if the start point
 132 ($i$) equals $loc_l(\LOC{w})$:
 133 \begin{align*}
 134   P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
 135   \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
 136   & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
 137   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\RGOL{\LOC{w}}, k, j) ]
 138 \end{align*}
 139
 140 For $\GOR{w}$ we are either under a right stop or the left daughter of
 141 a right attachment rule ($i \leq loc_l(\LOC{w}) < j \leq
 142 loc_l(\LOC{a}) < k$), adjacent iff the the end point ($j$) equals
 143 $loc_r(\LOC{w})$:
 144 \begin{align*}
 145   P_{OUTSIDE}(\GOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
 146   \LOC{w}))P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) ~ + \\
 147   & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
 148   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\GOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
 149 \end{align*}
 150
 151 $\GOL{w}$ is just like $\RGOL{w}$, except for the outside probability
 152 of having a stop above, where we use $\LGOR{w}$:
 153 \begin{align*}
 154   P_{OUTSIDE}(\GOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
 155   \LOC{w}))P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) ~ + \\
 156   & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
 157   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\GOL{\LOC{w}}, k, j) ]
 158 \end{align*}
 159
 160 $\LGOR{w}$ is just like $\GOR{w}$, except for the outside probability
 161 of having a stop above, where we use $\SEAL{w}$:
 162 \begin{align*}
 163   P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
 164   \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
 165   & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
 166   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\LGOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
 167 \end{align*}
 168
 169
 170 \section{Reestimating the rules} % todo
 171
 172 \subsection{$c$ and $w$ (helper formulas used below)}
 173 % todo: left out rule-probability! wops (also, make P_{fooside}
 174 % sentence-specific)
 175
 176 $c_s(\SMTR{\LOC{w}} : i, j)$ is ``the expected fraction of parses of
 177 $s$ with a node labeled $\SMTR{w}$ extending from position $i$ to
 178 position $j$'' \citep[p.~88]{klein-thesis}, here defined to equal
 179 $v_{q}$ of \citet[p.~41]{lari-csl90}\footnote{In terms of regular EM,
 180   this is the count of trees ($f_{T_q}(x)$ in
 181   \citet[p.~46]{prescher-em}) in which the node extended from $i$ to
 182   $j$.}:
 183 \begin{align*}
 184   c_s(\SMTR{\LOC{w}} : i, j) = P_{INSIDE_s}(\SMTR{\LOC{w}}, i, j) P_{OUTSIDE_s}(\SMTR{\LOC{w}}, i, j) / P_s
 185 \end{align*}
 186
 187 $w_s$ is $w_{q}$ from \citet[p.~41]{lari-csl90}, generalized to $\SMTR{h}$ and $dir$:
 188 \begin{align*}
 189   w_s(\SEAL{a} & : \SMTR{\LOC{h}}, left, i, j) = \\
 190   & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:i\leq loc_l(\LOC{a})<k}
 191           & P_{STOP}(\neg stop|h, left, adj(k, \LOC{h})) P_{CHOOSE}(a|h, left) \\
 192   &       & P_{INSIDE_s}(\SEAL{\LOC{a}}, i, k) P_{INSIDE_s}(\SMTR{\LOC{h}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
 193 \end{align*}
 194 \begin{align*}
 195   w_s(\SEAL{a} & : \SMTR{\LOC{h}}, right,  i, j) = \\
 196   & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<j}
 197           & P_{STOP}(\neg stop|h, right, adj(k, \LOC{h})) P_{CHOOSE}(a|h, right) \\
 198   &       & P_{INSIDE_s}(\SMTR{\LOC{h}}, i, k) P_{INSIDE_s}(\SEAL{\LOC{a}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
 199 \end{align*}
 200
 201 Let $\hat{P}$ be the new STOP/CHOOSE-probabilities (since the old $P$
 202 are used in $P_{INSIDE}$ and $P_{OUTSIDE}$).
 203
 204 \subsection{Attachment reestimation}
 205
 206 $\hat{a}$ is given in \citet[p.~41]{lari-csl90}. Here $i<loc_l(\LOC{h})$
 207 since we want trees with at least one attachment:
 208 \begin{align*}
 209   \hat{a} (a | \SMTR{h}, left) =  \frac
 210   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, left, i, j) }
 211   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 212 \end{align*}
 213
 214 Here $j>loc_r(\SMTR{\LOC{h}})$ since we want at least one attachment:
 215 \begin{align*}
 216   \hat{a} (a | \SMTR{h}, right) = \frac
 217   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, right, i, j) }
 218   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 219 \end{align*}
 220
 221 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
 222 where $i<loc_l(\LOC{h})$ (for $\GOR{h}$) or $j>loc_r(\LOC{h})$ (for $\GOL{h}$),
 223 this is implicit in $P_{INSIDE}$.
 224
 225
 226
 227 \begin{align*}
 228   \hat{P}_{CHOOSE} (a | h, left) =
 229   \hat{a} (a | \GOL{h}, left)
 230   + \hat{a} (a | \RGOL{h}, left)
 231 \end{align*}
 232 \begin{align*}
 233   \hat{P}_{CHOOSE} (a | h, right) =
 234   \hat{a} (a | \GOR{h},right)
 235   + \hat{a} (a | \LGOR{h},right)
 236 \end{align*}
 237
 238 \subsection{Stop reestimation}
 239 The following is based on \citet[p.~88]{klein-thesis}. For the
 240 non-adjacent rules, $i<loc_l(\LOC{h})$ on the left and $j>loc_r(\LOC{h})$ on the
 241 right, while for the adjacent rules these are equal (respectively).
 242
 243 To avoid some redundancy below, define a helper function $\hat{d}$ as follows:
 244 \begin{align*}
 245   \hat{d}(\SMTR{h},\SDTR{h},\XI,\XJ) = \frac
 246   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 247   { \sum_{s \in S} \sum_{\SDTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SDTR{\LOC{h}} : i, j) }
 248 \end{align*}
 249
 250 Then these are our reestimated stop probabilities:
 251 \begin{align*}
 252   \hat{P}_{STOP} (STOP|h, left, non\text{-}adj) =
 253   \hat{d}(\SEAL{h}, \RGOL{h},<,\geq)  +
 254   \hat{d}(\LGOR{h}, \GOL{h},<,=)
 255 \end{align*}
 256
 257 \begin{align*}
 258   \hat{P}_{STOP} (STOP|h, left, adj) =
 259   \hat{d}(\SEAL{h}, \RGOL{h},=,\geq)  +
 260   \hat{d}(\LGOR{h}, \GOL{h},=,=)
 261 \end{align*}
 262
 263 \begin{align*}
 264   \hat{P}_{STOP} (STOP|h, right, non\text{-}adj) =
 265   \hat{d}(\RGOL{h}, \GOR{h},=,>)  +
 266   \hat{d}(\SEAL{h}, \LGOR{h},\leq,>)
 267 \end{align*}
 268
 269 \begin{align*}
 270   \hat{P}_{STOP} (STOP|h, right, adj) =
 271   \hat{d}(\RGOL{h}, \GOR{h},=,=)  +
 272   \hat{d}(\SEAL{h}, \LGOR{h},\leq,=)
 273 \end{align*}
 274
 275
 276 \subsection{Root reestimation}
 277 Following \citet[p.~46]{prescher-em}, to find the reestimated
 278 probability of a PCFG rule, we first find the new treebank frequencies
 279 $f_{T_P}(tree)=P(tree)/P_s$, then for a rule $X' \rightarrow X$ we
 280 divide the new frequencies of the trees which use this rule and by
 281 those of the trees containing the node $X'$. $ROOT$ appears once per
 282 tree, meaning we divide by $1$ per sentence\footnote{Assuming each
 283   tree has frequency $1$.}, so $\hat{P}_{ROOT}(h)=\sum_{tree:ROOT
 284   \rightarrow \SEAL{h} \text{ used in } tree} f_{T_P}(tree)=\sum_{tree:ROOT
 285   \rightarrow \SEAL{h} \text{ used in } tree} P(tree)/P_s$, which turns into:
 286
 287 \begin{align*}
 288   \hat{P}_{ROOT} (h) = \frac
 289   {\sum_{s\in S} 1 / P_s \cdot \sum_{\LOC{h}\in s} P_{ROOT}(\LOC{h}) P_{INSIDE_s}(\SEAL{h}, 0, len(s))}
 290   {\sum_{s\in S} 1}
 291 \end{align*}
 292
 293 % todo: the following just didn't turn out right, but it ought to be
 294 % possible to use this to check the CNF-like unary attachments...
 295 %
 296 % The root attachment is just a unary PCFG rule $ROOT \rightarrow
 297 % \SEAL{h}$. Modifiying binary rule reestimation from
 298 % \citet[p.~41]{lari-csl90} to unary rules gives:
 299
 300 % \begin{align*}
 301 %   \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
 302 %   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, i, j)P_{OUTSIDE_s}(ROOT, i, j) }
 303 %   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SEAL{h}, i, j) }
 304 % \end{align*}
 305
 306 % Since $P_{OUTSIDE}(ROOT, i, j)$ is $1$ if $i=0$ and $j=len(s)$, and $0$ otherwise, this reduces into:
 307
 308 % \begin{align*}
 309 %   \hat{P}_{RULE}(ROOT \rightarrow \SEAL{h}) = \frac
 310 %   { \sum_{s\in S} 1/P_s \cdot P_{RULE}(ROOT \rightarrow \SEAL{h})P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
 311 %   { \sum_{s\in S} 1/P_s \cdot P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
 312 % \end{align*}
 313
 314
 315
 316
 317
 318 \section{Alternate CNF-like rules}
 319 Since the IO algorithm as described in \citet{lari-csl90} is made for
 320 rules in CNF, we have an alternate grammar (figure \ref{cnf-like}) for
 321 running parallell tests where we don't have to sum over the different
 322 $loc(h)$ in IO. This is not yet generalized to include left-first
 323 attachment. It is also not quite CNF, since it includes some unary
 324 rewrite rules.
 325
 326 \begin{figure}[htp]
 327   \centering
 328   \begin{tabular} % four left-aligned math tabs, one vertical line
 329     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 330     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
 331     \hline{}
 332
 333     \RN{\GOR{h}} \rightarrow& \GOR{h} &\SEAL{a}        &P_{STOP}(\neg stop|h, right, adj) \cdot P_{ATTACH}(a|h, right) \\
 334     &&&\\
 335     \RN{\GOR{h}} \rightarrow& \RN{\GOR{h}} &\SEAL{a}   &P_{STOP}(\neg stop|h, right, non\text{-}adj) \cdot P_{ATTACH}(a|h, right) \\
 336     &&&\\
 337     \RGOL{h} \rightarrow& \GOR{h} &STOP       &P_{STOP}(stop|h, right, adj) \\
 338     &&&\\
 339     \RGOL{h} \rightarrow& \RN{\GOR{h}} &STOP  &P_{STOP}(stop|h, right, non\text{-}adj) \\
 340     &&&\\
 341     \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\RGOL{h}      &P_{STOP}(\neg stop|h, left, adj) \cdot P_{ATTACH}(a|h, left) \\
 342     &&&\\
 343     \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\LN{\RGOL{h}} &P_{STOP}(\neg stop|h, left, non\text{-}adj) \cdot P_{ATTACH}(a|h, left) \\
 344     &&&\\
 345     \SEAL{h} \rightarrow& STOP &\RGOL{h}      &P_{STOP}(stop|h, left, adj) \\
 346     &&&\\
 347     \SEAL{h} \rightarrow& STOP &\LN{\RGOL{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 348   \end{tabular}
 349   \caption{Alternate CFG rules (where a child node has an arrow below,
 350     we use non-adjacent probabilities), defined for all words/POS-tags
 351     $h$.}\label{cnf-like}
 352 \end{figure}
 353
 354 The inside probabilities are the same as those given in
 355 \citet{lari-csl90}, with the following exceptions:
 356
 357 When calculating $P_{INSIDE}(\SMTR{h}, i, j)$ and summing through possible
 358 rules which rewrite $\SMTR{h}$, if a rule is of the form $\SMTR{h} \rightarrow
 359 STOP ~ \SDTR{h}$ or $\SMTR{h} \rightarrow \SDTR{h} ~ STOP$, we add $P_{RULE}\cdot
 360 P_{INSIDE}(\SDTR{h}, i, j)$ (that is, rewrite for the same sentence range);
 361 and, as a consequence of these unary rules: for ``terminal rules''
 362 ($P_{ORDER}$) to be applicable, not only must $i = j-1$, but also the
 363 left-hand side symbol of the rule must be of the form $\GOR{h}$.
 364
 365 Similarly, the outside probabilities are the same as those for pure
 366 CNF rules, with the exception that we add the unary rewrite
 367 probabilities
 368 \begin{align*}
 369   \sum_{\SMTR{h}} [&P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow \SDTR{h} ~ STOP) \\
 370           + &P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow STOP ~ \SDTR{h})]
 371 \end{align*}
 372 to $P_{OUTSIDE}(\SDTR{h},i,j)$ (eg. $f(s,t,i)$).
 373
 374 The reestimation just needs to be expanded to allow unary stop rules,
 375 this is similar to the formula for binary rules in
 376 \citet[p.~41]{lari-csl90}:
 377 \begin{align*}
 378   \hat{P}_{RULE}(\SMTR{h} \rightarrow \SDTR{h}) = \frac
 379   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(\SMTR{h}\rightarrow \SDTR{h})P_{INSIDE}(\SDTR{h}, i, j)P_{OUTSIDE}(\SMTR{h}, i, j) }
 380   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SMTR{h}, i, j) }
 381 \end{align*}
 382 (the denominator is unchanged, but the numerator sums $w_s$ defined
 383 for unary rules; $\SMTR{h}\rightarrow \SDTR{h}$ is meant to include both left and
 384 right stop rules).
 385
 386 % todo: add ROOT rule to CNF grammar?
 387
 388
 389 \section{Pure CNF rules}
 390 Alternatively, one may use the ``pure CNF'' grammar of figure
 391 \ref{cnf-T} and figure \ref{cnf-NT}, below (not yet
 392 implemented). Using the harmonic distribution to initialize this will
 393 still yield a mass-deficient grammar, but at least this allows the
 394 regular IO algorithm to be run.
 395
 396 \begin{figure}[htp]
 397   \centering
 398   \begin{tabular} % four left-aligned math tabs, one vertical line
 399     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 400     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($b[i,m]$ in \citet{lari-csl90})}\\
 401     \hline{}
 402
 403     h    \rightarrow&   \text{``h''}&         &1 \\
 404     &&&\\
 405     \SEAL{h}   \rightarrow&   \text{``h''}&         &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 406     &&&\\
 407     ROOT  \rightarrow&   \text{``h''}&         &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, adj) \cdot  P_{ROOT}(h) \\
 408   \end{tabular}
 409   \caption{Terminals of pure CNF rules, defined for all POS-tags
 410     $h$.}\label{cnf-T}
 411 \end{figure}
 412
 413 \begin{figure}[htp]
 414   \centering
 415   \begin{tabular} % four left-aligned math tabs, one vertical line
 416     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 417     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
 418     \hline{}
 419
 420     \SEAL{h}   \rightarrow&    h &\SEAL{a}   &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\
 421     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 422     \SEAL{h}   \rightarrow&    h &\GOR{.h}   &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
 423     \GOR{.h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\
 424     &&&\cdot  P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 425     \GOR{.h}   \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 426     \GOR{h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\
 427     &&&\cdot  P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 428     \GOR{h}   \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 429     \SEAL{h}   \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 430     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 431     \SEAL{h}   \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\[3.5pt]
 432     \GOL{h.}   \rightarrow& \SEAL{b} &\SEAL{a} &P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
 433     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 434     \GOL{h.}   \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 435     \GOL{h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
 436     &&&\cdot  P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
 437     \GOL{h}   \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
 438     \SEAL{h}   \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\[3.5pt]
 439     \SEAL{h}   \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 440     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 441     \SEAL{\GOR{h}}  \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
 442     \SEAL{\GOR{h}}  \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, right, non\text{-}adj) \\
 443     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 444     ROOT  \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\
 445     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 446     ROOT  \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 447     ROOT  \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 448     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 449     ROOT  \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 450     ROOT  \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 451     ROOT  \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 452     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 453   \end{tabular}
 454   \caption{Non-terminals of pure CNF rules, defined for all POS-tags
 455     $h$, $a$ and $b$.}\label{cnf-NT}
 456 \end{figure}
 457
 458
 459
 460
 461 \bibliography{./statistical.bib}
 462 \bibliographystyle{plainnat}
 463
 464 \end{document}