tex/formulas.tex

   1 % Created 2008-06-13 Fri 17:05
   2 \documentclass[11pt,a4paper]{article}
   3 \usepackage[utf8]{inputenc}
   4 \usepackage[T1]{fontenc}
   5 \usepackage{hyperref}
   6 \usepackage{natbib}
   7
   8 \usepackage{pslatex}
   9 \usepackage{pdfsync}
  10 \pdfoutput=1
  11
  12 \usepackage{qtree}
  13 \usepackage{amsmath}
  14 \usepackage{amssymb}
  15
  16 \usepackage{avm}
  17 \avmfont{\sc}
  18 \avmoptions{sorted,active}
  19 \avmvalfont{\rm}
  20 \avmsortfont{\scriptsize\it}
  21
  22 \usepackage{array} % for a tabular with math in it
  23
  24 \title{DMV formulas}
  25 \author{Kevin Brubeck Unhammer}
  26 \date{21 August 2008}
  27
  28 \begin{document}
  29
  30 \maketitle
  31
  32 This is an attempt at fleshing out the details of the inside-outside
  33 algorithm \citep{lari-csl90} applied to the DMV model of
  34 \citet{klein-thesis}.
  35
  36 \tableofcontents
  37
  38 \newcommand{\LOC}[1]{\textbf{#1}}
  39 \newcommand{\GOR}[1]{\overrightarrow{#1}}
  40 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
  41 \newcommand{\SEAL}[1]{\overline{#1}}
  42 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
  43 \newcommand{\GOL}[1]{\overleftarrow{#1}}
  44 \newcommand{\LN}[1]{\underleftarrow{#1}}
  45 \newcommand{\RN}[1]{\underrightarrow{#1}}
  46 \newcommand{\XI}{\lessdot}
  47 \newcommand{\XJ}{\gtrdot}
  48 \newcommand{\SMTR}[1]{\dddot{#1}}
  49 \newcommand{\SDTR}[1]{\ddot{#1}}
  50
  51 \section{Note on notation}
  52 $i, j, k$ are sentence positions (between words), where $i$ and $j$
  53 are always the start and end, respectively, for what we're calculating
  54 ($k$ is between $i$ and $j$ for $P_{INSIDE}$, to their right or left
  55 for $P_{OUTSIDE}$). $s \in S$ are sentences in the corpus. $\LOC{w}$
  56 is a word token (actually POS-token) of type $w$ at a certain sentence
  57 location. If $\LOC{w}$ is between $i$ and $i+1$, $loc(\LOC{w})=i$
  58 following \citet{klein-thesis}, meaning $i$ is adjacent to $\LOC{w}$
  59 on the left, while $j=loc(\LOC{w})+1$ means that $j$ is adjacent to
  60 $\LOC{w}$ on the right. To simplify, $loc_l(\LOC{w}):=loc(\LOC{w})$ and
  61 $loc_r(\LOC{w}):=loc(\LOC{w})+1$. We write $\LOC{h}$ if this is a head
  62 in the rule being used, $\LOC{a}$ if it is an attached argument.
  63
  64 Notational differences between the thesis \citep{klein-thesis} and the
  65 paper \citep{km-dmv}:
  66
  67 \begin{tabular}{cc}
  68 Paper: & Thesis: \\
  69 $w$ & $\GOR{w}$ \\
  70 $w\urcorner$ & $\RGOL{w}$ \\
  71 $\ulcorner{w}\urcorner$ & $\SEAL{w}$ \\
  72 \end{tabular}
  73
  74 I use $\SMTR{w}$ (or $\SDTR{w}$) to signify one of either $w, \GOR{w},
  75 \RGOL{w}, \LGOR{w}, \GOL{w}$ or $\SEAL{w}$\footnote{This means that
  76   $\SMTR{\LOC{w}}$ is the triplet of the actual POS-tag, its sentence
  77   location as a token, and the ``level of seals''.}.
  78
  79
  80 \section{Inside probabilities}
  81 $P_{INSIDE}$ is defined in \citet[pp.~106-108]{klein-thesis}, the only
  82 thing we need to add is that for right attachments,
  83 $i \leq loc_l(w)<k \leq loc_l(\LOC{a})<j$ while for left attachments,
  84 $i \leq loc_l(\LOC{a})<k \leq loc_l(w)<j$.
  85
  86 (For now, let
  87 \[ \forall{w}[P_{ORDER}(right\text{-}first|w)=1.0] \] since the DMV implementation
  88 is not yet generalized to both directions.)
  89
  90
  91
  92
  93
  94 \subsection{Sentence probability}
  95
  96 $P_s$ is the sentence probability, based on
  97 \citet[p.~38]{lari-csl90}. Since the ROOT rules are different from the
  98 rest, we sum them explicitly in this definition:
  99 \begin{align*}
 100   P_s = \sum_{\LOC{w} \in s} P_{ROOT}(\LOC{w}) P_{INSIDE}(\SEAL{\LOC{w}}, 0, len(s))
 101 \end{align*}
 102
 103 \section{Outside probabilities}
 104
 105 \begin{align*}
 106   P_{OUTSIDE_s}(ROOT, i, j) = \begin{cases}
 107     1.0 & \text{ if $i = 0$ and $j = len(s)$,}\\
 108     0.0 & \text{ otherwise}
 109   \end{cases}
 110 \end{align*}
 111
 112 For $P_{OUTSIDE}(\SEAL{w}, i, j)$, $w$ is attached to under something
 113 else ($\SEAL{w}$ is what we elsewhere call $\SEAL{a}$). Adjacency is
 114 thus calculated on the basis of $h$, the head of the rule. If we are
 115 attached to from the left we have $i \leq  loc_l(\LOC{w}) < j \leq  loc_l(\LOC{h}) < k$, while
 116 from the right we have $k \leq  loc_l(\LOC{h}) < i \leq  loc_l(\LOC{w}) < j$:
 117 \begin{align*}
 118   P_{OUTSIDE}&(\SEAL{\LOC{w}}, i, j) = \\
 119   & P_{ROOT}(w) P_{OUTSIDE}(ROOT, i, j) + \\
 120   & [ \sum_{k > j} ~ \sum_{\LOC{h}:j\leq loc_l(\LOC{h})<k} \sum_{\SMTR{\LOC{h}} \in \{\RGOL{\LOC{h}},\GOL{\LOC{h}}\}} P_{STOP}(\neg stop|h, left,  adj(j, \LOC{h})) P_{ATTACH}(w|h, left) \\
 121   & \qquad \qquad \qquad \qquad \qquad P_{OUTSIDE}(\SMTR{\LOC{h}}, i, k) P_{INSIDE}(\SMTR{\LOC{h}}, j, k) ] ~ + \\
 122   & [ \sum_{k < i} ~ \sum_{\LOC{h}:k\leq loc_l(\LOC{h})<i} \sum_{\SMTR{\LOC{h}} \in \{\LGOR{\LOC{h}},\GOR{\LOC{h}}\}} P_{STOP}(\neg stop|h, right, adj(i, \LOC{h})) P_{ATTACH}(w|h, right) \\
 123   & \qquad \qquad \qquad \qquad \qquad P_{INSIDE}(\SMTR{\LOC{h}}, k, i) P_{OUTSIDE}(\SMTR{\LOC{h}}, k, j) ]
 124 \end{align*}
 125
 126 For $\RGOL{w}$ we know it is either under a left stop rule or it is
 127 the right daughter of a left attachment rule ($k \leq loc_l(\LOC{a}) <
 128 i \leq loc_l(\LOC{w}) < j$), and these are adjacent if the start point
 129 ($i$) equals $loc_l(\LOC{w})$:
 130 \begin{align*}
 131   P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
 132   \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
 133   & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
 134   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\RGOL{\LOC{w}}, k, j) ]
 135 \end{align*}
 136
 137 For $\GOR{w}$ we are either under a right stop or the left daughter of
 138 a right attachment rule ($i \leq loc_l(\LOC{w}) < j \leq
 139 loc_l(\LOC{a}) < k$), adjacent iff the the end point ($j$) equals
 140 $loc_r(\LOC{w})$:
 141 \begin{align*}
 142   P_{OUTSIDE}(\GOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
 143   \LOC{w}))P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) ~ + \\
 144   & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
 145   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\GOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
 146 \end{align*}
 147
 148 $\GOL{w}$ is just like $\RGOL{w}$, except for the outside probability
 149 of having a stop above, where we use $\LGOR{w}$:
 150 \begin{align*}
 151   P_{OUTSIDE}(\GOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
 152   \LOC{w}))P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) ~ + \\
 153   & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
 154   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\GOL{\LOC{w}}, k, j) ]
 155 \end{align*}
 156
 157 $\LGOR{w}$ is just like $\GOR{w}$, except for the outside probability
 158 of having a stop above, where we use $\SEAL{w}$:
 159 \begin{align*}
 160   P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
 161   \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
 162   & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
 163   & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\LGOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
 164 \end{align*}
 165
 166
 167 \section{Reestimating the rules} % todo
 168
 169 \subsection{$c$ and $w$ (helper formulas used below)}
 170 % todo: left out rule-probability! wops (also, make P_{fooside}
 171 % sentence-specific)
 172
 173 $c_s(\SMTR{\LOC{w}} : i, j)$ is ``the expected fraction of parses of $s$ with a
 174 node labeled $\SMTR{w}$ extending from position $i$ to position $j$''
 175 \citep[p.~88]{klein-thesis}, here defined to equal $v_{q}$ of
 176 \citet[p.~41]{lari-csl90}:
 177 \begin{align*}
 178   c_s(\SMTR{\LOC{w}} : i, j) = P_{INSIDE_s}(\SMTR{\LOC{w}}, i, j) P_{OUTSIDE_s}(\SMTR{\LOC{w}}, i, j) / P_s
 179 \end{align*}
 180
 181 $w_s$ is $w_{q}$ from \citet[p.~41]{lari-csl90}, generalized to $\SMTR{h}$ and $dir$:
 182 \begin{align*}
 183   w_s(\SEAL{a} & : \SMTR{\LOC{h}}, left, i, j) = \\
 184   & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:i\leq loc_l(\LOC{a})<k}
 185           & P_{STOP}(\neg stop|h, left, adj(k, \SMTR{\LOC{h}})) P_{CHOOSE}(a|h, left) \\
 186   &       & P_{INSIDE_s}(\SEAL{\LOC{a}}, i, k) P_{INSIDE_s}(\SMTR{\LOC{h}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
 187 \end{align*}
 188 \begin{align*}
 189   w_s(\SEAL{a} & : \SMTR{\LOC{h}}, right,  i, j) = \\
 190   & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<j}
 191           & P_{STOP}(\neg stop|h, right, adj(k, \SMTR{\LOC{h}})) P_{CHOOSE}(a|h, right) \\
 192   &       & P_{INSIDE_s}(\SMTR{\LOC{h}}, i, k) P_{INSIDE_s}(\SEAL{\LOC{a}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
 193 \end{align*}
 194
 195 Let $\hat{P}$ be the new STOP/CHOOSE-probabilities (since the old $P$
 196 are used in $P_{INSIDE}$ and $P_{OUTSIDE}$).
 197
 198 \subsection{Attachment reestimation}
 199
 200 $\hat{a}$ is given in \citet[p.~41]{lari-csl90}. Here $i<loc_l(\LOC{h})$
 201 since we want trees with at least one attachment:
 202 \begin{align*}
 203   \hat{a} (a | \SMTR{h}, left) =  \frac
 204   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, left, i, j) }
 205   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 206 \end{align*}
 207
 208 Here $j>loc_r(\SMTR{\LOC{h}})$ since we want at least one attachment:
 209 \begin{align*}
 210   \hat{a} (a | \SMTR{h}, right) = \frac
 211   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, right, i, j) }
 212   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 213 \end{align*}
 214
 215 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
 216 where $i<loc_l(\LOC{h})$ (for $\GOR{h}$) or $j>loc_r(\LOC{h})$ (for $\GOL{h}$),
 217 this is implicit in $P_{INSIDE}$.
 218
 219
 220
 221 \begin{align*}
 222   \hat{P}_{CHOOSE} (a | h, left) = \frac
 223   { \hat{a} (a | \GOL{h}, left) }
 224   { \sum_{w} \hat{a} (w | \GOL{h}, left) }
 225   +
 226   \frac
 227   { \hat{a} (a | \RGOL{h}, left) }
 228   { \sum_{w} \hat{a} (w | \RGOL{h}, left) }
 229 \end{align*}
 230 \begin{align*}
 231   \hat{P}_{CHOOSE} (a | h, right) = \frac
 232   { \hat{a} (a | \GOR{h},right) }
 233   { \sum_{w} \hat{a} (w | \GOR{h},right) }
 234   +
 235   \frac
 236   { \hat{a} (a | \LGOR{h},right) }
 237   { \sum_{w} \hat{a} (w | \LGOR{h},right) }
 238 \end{align*}
 239
 240 \subsection{Stop reestimation}
 241 The following is based on \citet[p.~88]{klein-thesis}. For the
 242 non-adjacent rules, $i<loc_l(\LOC{h})$ on the left and $j>loc_r(\LOC{h})$ on the
 243 right, while for the adjacent rules these are equal (respectively).
 244
 245 To avoid some redundancy below, define a helper function $\hat{d}$ as follows:
 246 \begin{align*}
 247   \hat{d}(\SMTR{h},\SDTR{h},\XI,\XJ) = \frac
 248   { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
 249   { \sum_{s \in S} \sum_{\SDTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SDTR{\LOC{h}} : i, j) }
 250 \end{align*}
 251
 252 Then these are our reestimated stop probabilities:
 253 \begin{align*}
 254   \hat{P}_{STOP} (STOP|h, left, non\text{-}adj) =
 255   \hat{d}(\SEAL{h}, \RGOL{h},<,\geq)  +
 256   \hat{d}(\LGOR{h}, \GOL{h},<,=)
 257 \end{align*}
 258
 259 \begin{align*}
 260   \hat{P}_{STOP} (STOP|h, left, adj) =
 261   \hat{d}(\SEAL{h}, \RGOL{h},=,\geq)  +
 262   \hat{d}(\LGOR{h}, \GOL{h},=,=)
 263 \end{align*}
 264
 265 \begin{align*}
 266   \hat{P}_{STOP} (STOP|h, right, non\text{-}adj) =
 267   \hat{d}(\RGOL{h}, \GOR{h},=,>)  +
 268   \hat{d}(\SEAL{h}, \LGOR{h},\leq,>)
 269 \end{align*}
 270
 271 \begin{align*}
 272   \hat{P}_{STOP} (STOP|h, right, adj) =
 273   \hat{d}(\RGOL{h}, \GOR{h},=,=)  +
 274   \hat{d}(\SEAL{h}, \LGOR{h},\leq,=)
 275 \end{align*}
 276
 277
 278 \subsection{Root reestimation} % todo grok: no \sum_{i} \sum_{j} ?
 279 \begin{align*}
 280   \hat{P}_{ROOT} (h) = \frac
 281   {\sum_{s\in S} P_{ROOT}(h) P_{INSIDE_s}(\SEAL{h}, 0, len(s)) }
 282   {\sum_{s\in S} P_s}
 283 \end{align*}
 284
 285
 286
 287
 288 \section{Alternate CNF-like rules}
 289 Since the IO algorithm as described in \citet{lari-csl90} is made for
 290 rules in CNF, we have an alternate grammar (figure \ref{cnf-like}) for
 291 running parallell tests where we don't have to sum over the different
 292 $loc(h)$ in IO. This is not yet generalized to include left-first
 293 attachment. It is also not quite CNF, since it includes some unary
 294 rewrite rules.
 295
 296 \begin{figure}[htp]
 297   \centering
 298   \begin{tabular} % four left-aligned math tabs, one vertical line
 299     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 300     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
 301     \hline{}
 302
 303     \RN{\GOR{h}} \rightarrow& \GOR{h} &\SEAL{a}        &P_{STOP}(\neg stop|h, right, adj) \cdot P_{ATTACH}(a|h, right) \\
 304     &&&\\
 305     \RN{\GOR{h}} \rightarrow& \RN{\GOR{h}} &\SEAL{a}   &P_{STOP}(\neg stop|h, right, non\text{-}adj) \cdot P_{ATTACH}(a|h, right) \\
 306     &&&\\
 307     \RGOL{h} \rightarrow& \GOR{h} &STOP       &P_{STOP}(stop|h, right, adj) \\
 308     &&&\\
 309     \RGOL{h} \rightarrow& \RN{\GOR{h}} &STOP  &P_{STOP}(stop|h, right, non\text{-}adj) \\
 310     &&&\\
 311     \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\RGOL{h}      &P_{STOP}(\neg stop|h, left, adj) \cdot P_{ATTACH}(a|h, left) \\
 312     &&&\\
 313     \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\LN{\RGOL{h}} &P_{STOP}(\neg stop|h, left, non\text{-}adj) \cdot P_{ATTACH}(a|h, left) \\
 314     &&&\\
 315     \SEAL{h} \rightarrow& STOP &\RGOL{h}      &P_{STOP}(stop|h, left, adj) \\
 316     &&&\\
 317     \SEAL{h} \rightarrow& STOP &\LN{\RGOL{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 318   \end{tabular}
 319   \caption{Alternate CFG rules (where a child node has an arrow below,
 320     we use non-adjacent probabilities), defined for all words/POS-tags
 321     $h$.}\label{cnf-like}
 322 \end{figure}
 323
 324 The inside probabilities are the same as those given in
 325 \citet{lari-csl90}, with the following exceptions:
 326
 327 When calculating $P_{INSIDE}(\SMTR{h}, i, j)$ and summing through possible
 328 rules which rewrite $\SMTR{h}$, if a rule is of the form $\SMTR{h} \rightarrow
 329 STOP ~ \SDTR{h}$ or $\SMTR{h} \rightarrow \SDTR{h} ~ STOP$, we add $P_{RULE}\cdot
 330 P_{INSIDE}(\SDTR{h}, i, j)$ (that is, rewrite for the same sentence range);
 331 and, as a consequence of these unary rules: for ``terminal rules''
 332 ($P_{ORDER}$) to be applicable, not only must $i = j-1$, but also the
 333 left-hand side symbol of the rule must be of the form $\GOR{h}$.
 334
 335 Similarly, the outside probabilities are the same as those for pure
 336 CNF rules, with the exception that we add the unary rewrite
 337 probabilities
 338 \begin{align*}
 339   \sum_{\SMTR{h}} [&P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow \SDTR{h} ~ STOP) \\
 340           + &P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow STOP ~ \SDTR{h})]
 341 \end{align*}
 342 to $P_{OUTSIDE}(\SDTR{h},i,j)$ (eg. $f(s,t,i)$).
 343
 344 The reestimation just needs to be expanded to allow unary stop rules,
 345 this is similar to the formula for binary rules in
 346 \citet[p.~41]{lari-csl90}:
 347 \begin{align*}
 348   \hat{P}_{RULE}(\SMTR{h} \rightarrow \SDTR{h}) = \frac
 349   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} 1/P_s \cdot P_{RULE}(\SMTR{h}\rightarrow \SDTR{h})P_{INSIDE}(\SDTR{h}, i, j)P_{OUTSIDE}(\SMTR{h}, i, j) }
 350   { \sum_{s\in S} \sum_{i<len(s)} \sum_{j>i} c_s(\SMTR{h}, i, j) }
 351 \end{align*}
 352 (the denominator is unchanged, but the numerator sums $w_s$ defined
 353 for unary rules; $\SMTR{h}\rightarrow \SDTR{h}$ is meant to include both left and
 354 right stop rules).
 355
 356 % todo: add ROOT rule to CNF grammar?
 357
 358
 359 \section{Pure CNF rules}
 360 Alternatively, one may use the ``pure CNF'' grammar of figure
 361 \ref{cnf-T} and figure \ref{cnf-NT}, below (not yet
 362 implemented). Using the harmonic distribution to initialize this will
 363 still yield a mass-deficient grammar, but at least this allows the
 364 regular IO algorithm to be run.
 365
 366 \begin{figure}[htp]
 367   \centering
 368   \begin{tabular} % four left-aligned math tabs, one vertical line
 369     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 370     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($b[i,m]$ in \citet{lari-csl90})}\\
 371     \hline{}
 372
 373     h    \rightarrow&   \text{``h''}&         &1 \\
 374     &&&\\
 375     \SEAL{h}   \rightarrow&   \text{``h''}&         &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 376     &&&\\
 377     ROOT  \rightarrow&   \text{``h''}&         &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, adj) \cdot  P_{ROOT}(h) \\
 378   \end{tabular}
 379   \caption{Terminals of pure CNF rules, defined for all POS-tags
 380     $h$.}\label{cnf-T}
 381 \end{figure}
 382
 383 \begin{figure}[htp]
 384   \centering
 385   \begin{tabular} % four left-aligned math tabs, one vertical line
 386     { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
 387     \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
 388     \hline{}
 389
 390     \SEAL{h}   \rightarrow&    h &\SEAL{a}   &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\
 391     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 392     \SEAL{h}   \rightarrow&    h &\GOR{.h}   &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
 393     \GOR{.h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\
 394     &&&\cdot  P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 395     \GOR{.h}   \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 396     \GOR{h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\
 397     &&&\cdot  P_{ATTACH}(b|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 398     \GOR{h}   \rightarrow& \SEAL{a} &\GOR{h} &P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, non\text{-}adj) \\[3.5pt]
 399     \SEAL{h}   \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 400     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 401     \SEAL{h}   \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\[3.5pt]
 402     \GOL{h.}   \rightarrow& \SEAL{b} &\SEAL{a} &P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
 403     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 404     \GOL{h.}   \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 405     \GOL{h}   \rightarrow& \SEAL{a} &\SEAL{b} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\
 406     &&&\cdot  P_{ATTACH}(b|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
 407     \GOL{h}   \rightarrow& \GOL{h} &\SEAL{a} &P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, non\text{-}adj) \\[3.5pt]
 408     \SEAL{h}   \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\[3.5pt]
 409     \SEAL{h}   \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 410     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \\[3.5pt]
 411     \SEAL{\GOR{h}}  \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, right, non\text{-}adj) \\[3.5pt]
 412     \SEAL{\GOR{h}}  \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, right, non\text{-}adj) \\
 413     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \\[3.5pt]
 414     ROOT  \rightarrow& h &\SEAL{a} &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \\
 415     &&&\cdot  P_{ATTACH}(a|h, right)\cdot P_{STOP}(\neg stop|h, right, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 416     ROOT  \rightarrow& h &\GOR{.h} &P_{STOP}(stop|h, left, adj) \cdot  P_{STOP}(stop|h, right, non\text{-}adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 417     ROOT  \rightarrow& \SEAL{a} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \\
 418     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 419     ROOT  \rightarrow& \GOL{h.} &h &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{STOP}(stop|h, right, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 420     ROOT  \rightarrow& \GOL{h.} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 421     ROOT  \rightarrow& \SEAL{a} &\SEAL{\GOR{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
 422     &&&\cdot  P_{ATTACH}(a|h, left)\cdot P_{STOP}(\neg stop|h, left, adj) \cdot  P_{ROOT}(h) \\[3.5pt]
 423   \end{tabular}
 424   \caption{Non-terminals of pure CNF rules, defined for all POS-tags
 425     $h$, $a$ and $b$.}\label{cnf-NT}
 426 \end{figure}
 427
 428
 429
 430
 431 \bibliography{./statistical.bib}
 432 \bibliographystyle{plainnat}
 433
 434 \end{document}