another little fix to stop initialization
[dmvccm.git] / report / report.tex
blob44fced8a1dc1587b038d550d7811defa41710ab7
1 % Created 2008-06-13 Fri 17:05
3 \documentclass[11pt,a4paper]{article}
4 \usepackage[utf8]{inputenc}
5 \usepackage[T1]{fontenc}
6 \usepackage{hyperref}
7 \usepackage{natbib}
9 \usepackage{pslatex}
10 \usepackage{pdfsync}
11 \pdfoutput=1
13 \usepackage{qtree}
14 \usepackage{amsmath}
15 \usepackage{amssymb}
17 \usepackage{avm}
18 \avmfont{\sc}
19 \avmoptions{sorted,active}
20 \avmvalfont{\rm}
21 \avmsortfont{\scriptsize\it}
23 \usepackage{array} % for a tabular with math in it
25 \title{The DMV and CCM models}
26 \author{Emily Morgan \& Kevin Brubeck Unhammer}
27 \date{15 September 2008}
29 \begin{document}
31 \maketitle
33 \tableofcontents
35 \section{Introduction}
36 \section{The Constituent Context Model}
37 \subsection{Results}
38 \section{A Dependency Model with Valence}
39 This is an attempt at fleshing out the details of the inside-outside
40 algorithm \citep{lari-csl90} applied to the DMV model of
41 \citet{klein-thesis}.
43 \newcommand{\LOC}[1]{\textbf{#1}}
44 \newcommand{\GOR}[1]{\overrightarrow{#1}}
45 \newcommand{\RGOL}[1]{\overleftarrow{\overrightarrow{#1}}}
46 \newcommand{\SEAL}[1]{\overline{#1}}
47 \newcommand{\LGOR}[1]{\overrightarrow{\overleftarrow{#1}}}
48 \newcommand{\GOL}[1]{\overleftarrow{#1}}
49 \newcommand{\LN}[1]{\underleftarrow{#1}}
50 \newcommand{\RN}[1]{\underrightarrow{#1}}
51 \newcommand{\XI}{\lessdot}
52 \newcommand{\XJ}{\gtrdot}
53 \newcommand{\SMTR}[1]{\dddot{#1}}
54 \newcommand{\SDTR}[1]{\ddot{#1}}
56 \subsection{Note on notation}
57 $i, j, k$ are sentence positions (between words), where $i$ and $j$
58 are always the start and end, respectively, for what we're calculating
59 ($k$ is between $i$ and $j$ for $P_{INSIDE}$, to their right or left
60 for $P_{OUTSIDE}$). $s \in S$ are sentences in the corpus. $\LOC{w}$
61 is a word token (actually POS-token) of type $w$ at a certain sentence
62 location. If $\LOC{w}$ is between $i$ and $i+1$, $loc(\LOC{w})=i$
63 following \citet{klein-thesis}, meaning $i$ is adjacent to $\LOC{w}$
64 on the left, while $j=loc(\LOC{w})+1$ means that $j$ is adjacent to
65 $\LOC{w}$ on the right. To simplify, $loc_l(\LOC{w}):=loc(\LOC{w})$ and
66 $loc_r(\LOC{w}):=loc(\LOC{w})+1$. We write $\LOC{h}$ if this is a head
67 in the rule being used, $\LOC{a}$ if it is an attached argument.
69 There are som notational differences between \citet{klein-thesis}
70 \citet{km-dmv}:
72 \begin{tabular}{cc}
73 Paper: & Thesis: \\
74 $w$ & $\GOR{w}$ \\
75 $w\urcorner$ & $\RGOL{w}$ \\
76 $\ulcorner{w}\urcorner$ & $\SEAL{w}$ \\
77 \end{tabular}
79 We use $\SMTR{w}$ (or $\SDTR{w}$) to signify one of either $w, \GOR{w},
80 \RGOL{w}, \LGOR{w}, \GOL{w}$ or $\SEAL{w}$\footnote{This means that
81 $\SMTR{\LOC{w}}$ is the triplet of the actual POS-tag, its sentence
82 location as a token, and the ``level of seals''.}.
85 \subsection{Inside probabilities}
86 $P_{INSIDE}$ is defined in \citet[pp.~106-108]{klein-thesis}, the only
87 thing we need to add is that for right attachments,
88 $i \leq loc_l(w)<k \leq loc_l(\LOC{a})<j$ while for left attachments,
89 $i \leq loc_l(\LOC{a})<k \leq loc_l(w)<j$.
91 (For now, let
92 \[ \forall{w}[P_{ORDER}(right\text{-}first|w)=1.0] \] since the DMV implementation
93 is not yet generalized to both directions.)
99 \subsubsection{Sentence probability}
101 $P_s$ is the sentence probability, based on
102 \citet[p.~38]{lari-csl90}. Since the ROOT rules are different from the
103 rest, we sum them explicitly in this definition:
104 \begin{align*}
105 P_s = \sum_{\LOC{w} \in s} P_{ROOT}(\LOC{w}) P_{INSIDE}(\SEAL{\LOC{w}}, 0, len(s))
106 \end{align*}
108 \subsection{Outside probabilities}
110 \begin{align*}
111 P_{OUTSIDE_s}(ROOT, i, j) = \begin{cases}
112 1.0 & \text{ if $i = 0$ and $j = len(s)$,}\\
113 0.0 & \text{ otherwise}
114 \end{cases}
115 \end{align*}
117 For $P_{OUTSIDE}(\SEAL{w}, i, j)$, $w$ is attached to under something
118 else ($\SEAL{w}$ is what we elsewhere call $\SEAL{a}$). Adjacency is
119 thus calculated on the basis of $h$, the head of the rule. If we are
120 attached to from the left we have $i \leq loc_l(\LOC{w}) < j \leq loc_l(\LOC{h}) < k$, while
121 from the right we have $k \leq loc_l(\LOC{h}) < i \leq loc_l(\LOC{w}) < j$:
122 \begin{align*}
123 P_{OUTSIDE}&(\SEAL{\LOC{w}}, i, j) = \\
124 & P_{ROOT}(w) P_{OUTSIDE}(ROOT, i, j) + \\
125 & [ \sum_{k > j} ~ \sum_{\LOC{h}:j\leq loc_l(\LOC{h})<k} \sum_{\SMTR{\LOC{h}} \in \{\RGOL{\LOC{h}},\GOL{\LOC{h}}\}} P_{STOP}(\neg stop|h, left, adj(j, \LOC{h})) P_{ATTACH}(w|h, left) \\
126 & \qquad \qquad \qquad \qquad \qquad P_{OUTSIDE}(\SMTR{\LOC{h}}, i, k) P_{INSIDE}(\SMTR{\LOC{h}}, j, k) ] ~ + \\
127 & [ \sum_{k < i} ~ \sum_{\LOC{h}:k\leq loc_l(\LOC{h})<i} \sum_{\SMTR{\LOC{h}} \in \{\LGOR{\LOC{h}},\GOR{\LOC{h}}\}} P_{STOP}(\neg stop|h, right, adj(i, \LOC{h})) P_{ATTACH}(w|h, right) \\
128 & \qquad \qquad \qquad \qquad \qquad P_{INSIDE}(\SMTR{\LOC{h}}, k, i) P_{OUTSIDE}(\SMTR{\LOC{h}}, k, j) ]
129 \end{align*}
131 For $\RGOL{w}$ we know it is either under a left stop rule or it is
132 the right daughter of a left attachment rule ($k \leq loc_l(\LOC{a}) <
133 i \leq loc_l(\LOC{w}) < j$), and these are adjacent if the start point
134 ($i$) equals $loc_l(\LOC{w})$:
135 \begin{align*}
136 P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
137 \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
138 & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
139 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\RGOL{\LOC{w}}, k, j) ]
140 \end{align*}
142 For $\GOR{w}$ we are either under a right stop or the left daughter of
143 a right attachment rule ($i \leq loc_l(\LOC{w}) < j \leq
144 loc_l(\LOC{a}) < k$), adjacent iff the the end point ($j$) equals
145 $loc_r(\LOC{w})$:
146 \begin{align*}
147 P_{OUTSIDE}(\GOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
148 \LOC{w}))P_{OUTSIDE}(\RGOL{\LOC{w}}, i, j) ~ + \\
149 & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
150 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\GOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
151 \end{align*}
153 $\GOL{w}$ is just like $\RGOL{w}$, except for the outside probability
154 of having a stop above, where we use $\LGOR{w}$:
155 \begin{align*}
156 P_{OUTSIDE}(\GOL{\LOC{w}}, i, j) = & P_{STOP}(stop|w, left, adj(i,
157 \LOC{w}))P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) ~ + \\
158 & [ \sum_{k < i} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<i} P_{STOP}(\neg stop|w, left, adj(i, \LOC{w})) P_{ATTACH}(a|w, left) \\
159 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{INSIDE}(\SEAL{\LOC{a}}, k, i) P_{OUTSIDE}(\GOL{\LOC{w}}, k, j) ]
160 \end{align*}
162 $\LGOR{w}$ is just like $\GOR{w}$, except for the outside probability
163 of having a stop above, where we use $\SEAL{w}$:
164 \begin{align*}
165 P_{OUTSIDE}(\LGOR{\LOC{w}}, i, j) = & P_{STOP}(stop|w, right, adj(j,
166 \LOC{w}))P_{OUTSIDE}(\SEAL{\LOC{w}}, i, j) ~ + \\
167 & [ \sum_{k > j} ~ \sum_{\LOC{a}:j\leq loc_l(\LOC{a})<k} P_{STOP}(\neg stop|w, right, adj(j, \LOC{w})) P_{ATTACH}(a|w, right) \\
168 & ~~~~~~~~~~~~~~~~~~~~~~~~~ P_{OUTSIDE}(\LGOR{\LOC{w}}, i, k) P_{INSIDE}(\SEAL{\LOC{a}}, j, k) ]
169 \end{align*}
172 \subsection{Reestimating the rules}
173 % TODO: fix stop and attachment formulas so they divide before summing
175 \subsubsection{$c$ and $w$ (helper formulas used below)}
176 $c_s(\SMTR{\LOC{w}} : i, j)$ is ``the expected fraction of parses of
177 $s$ with a node labeled $\SMTR{w}$ extending from position $i$ to
178 position $j$'' \citep[p.~88]{klein-thesis}, here defined to equal
179 $v_{q}$ of \citet[p.~41]{lari-csl90}\footnote{In terms of regular EM,
180 this is the count of trees ($f_{T_q}(x)$ in
181 \citet[p.~46]{prescher-em}) in which the node extended from $i$ to
182 $j$.}:
183 \begin{align*}
184 c_s(\SMTR{\LOC{w}} : i, j) = P_{INSIDE_s}(\SMTR{\LOC{w}}, i, j) P_{OUTSIDE_s}(\SMTR{\LOC{w}}, i, j) / P_s
185 \end{align*}
187 $w_s$ is $w_{q}$ from \citet[p.~41]{lari-csl90}, generalized to $\SMTR{h}$ and $dir$:
188 \begin{align*}
189 w_s(\SEAL{a} & : \SMTR{\LOC{h}}, left, i, j) = \\
190 & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:i\leq loc_l(\LOC{a})<k}
191 & P_{STOP}(\neg stop|h, left, adj(k, \LOC{h})) P_{CHOOSE}(a|h, left) \\
192 & & P_{INSIDE_s}(\SEAL{\LOC{a}}, i, k) P_{INSIDE_s}(\SMTR{\LOC{h}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
193 \end{align*}
194 \begin{align*}
195 w_s(\SEAL{a} & : \SMTR{\LOC{h}}, right, i, j) = \\
196 & 1/P_s \sum_{k:i<k<j} ~ \sum_{\LOC{a}:k\leq loc_l(\LOC{a})<j}
197 & P_{STOP}(\neg stop|h, right, adj(k, \LOC{h})) P_{CHOOSE}(a|h, right) \\
198 & & P_{INSIDE_s}(\SMTR{\LOC{h}}, i, k) P_{INSIDE_s}(\SEAL{\LOC{a}}, k, j) P_{OUTSIDE_s}(\SMTR{\LOC{h}}, i, j)
199 \end{align*}
201 Let $\hat{P}$ be the new STOP/CHOOSE-probabilities (since the old $P$
202 are used in $P_{INSIDE}$ and $P_{OUTSIDE}$).
204 \subsubsection{Attachment reestimation}
206 $\hat{a}$ is given in \citet[p.~41]{lari-csl90}. Here $i<loc_l(\LOC{h})$
207 since we want trees with at least one attachment:
208 \begin{align*}
209 \hat{a} (a | \SMTR{h}, left) = \frac
210 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, left, i, j) }
211 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i<loc_l(\LOC{h})} \sum_{j\geq loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
212 \end{align*}
214 Here $j>loc_r(\SMTR{\LOC{h}})$ since we want at least one attachment:
215 \begin{align*}
216 \hat{a} (a | \SMTR{h}, right) = \frac
217 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} w_s(\SEAL{a} : \SMTR{\LOC{h}}, right, i, j) }
218 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i\leq loc_l(\LOC{h})} \sum_{j>loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
219 \end{align*}
221 For the first/lowest attachments, $w_s$ and $c_s$ have zero probability
222 where $i<loc_l(\LOC{h})$ (for $\GOR{h}$) or $j>loc_r(\LOC{h})$ (for $\GOL{h}$),
223 this is implicit in $P_{INSIDE}$.
227 \begin{align*}
228 \hat{P}_{CHOOSE} (a | h, left) =
229 \hat{a} (a | \GOL{h}, left)
230 + \hat{a} (a | \RGOL{h}, left)
231 \end{align*}
232 \begin{align*}
233 \hat{P}_{CHOOSE} (a | h, right) =
234 \hat{a} (a | \GOR{h},right)
235 + \hat{a} (a | \LGOR{h},right)
236 \end{align*}
238 \subsubsection{Stop reestimation}
239 The following is based on \citet[p.~88]{klein-thesis}. For the
240 non-adjacent rules, $i<loc_l(\LOC{h})$ on the left and $j>loc_r(\LOC{h})$ on the
241 right, while for the adjacent rules these are equal (respectively).
243 To avoid some redundancy below, define a helper function $\hat{d}$ as follows:
244 \begin{align*}
245 \hat{d}(\SMTR{h},\SDTR{h},\XI,\XJ) = \frac
246 { \sum_{s \in S} \sum_{\SMTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SMTR{\LOC{h}} : i, j) }
247 { \sum_{s \in S} \sum_{\SDTR{\LOC{h}}:\LOC{h} \in s} \sum_{i:i \XI loc_l(\LOC{h})} \sum_{j:j \XJ loc_r(\LOC{h})} c_s(\SDTR{\LOC{h}} : i, j) }
248 \end{align*}
250 Then these are our reestimated stop probabilities:
251 \begin{align*}
252 \hat{P}_{STOP} (STOP|h, left, non\text{-}adj) =
253 \hat{d}(\SEAL{h}, \RGOL{h},<,\geq) +
254 \hat{d}(\LGOR{h}, \GOL{h},<,=)
255 \end{align*}
257 \begin{align*}
258 \hat{P}_{STOP} (STOP|h, left, adj) =
259 \hat{d}(\SEAL{h}, \RGOL{h},=,\geq) +
260 \hat{d}(\LGOR{h}, \GOL{h},=,=)
261 \end{align*}
263 \begin{align*}
264 \hat{P}_{STOP} (STOP|h, right, non\text{-}adj) =
265 \hat{d}(\RGOL{h}, \GOR{h},=,>) +
266 \hat{d}(\SEAL{h}, \LGOR{h},\leq,>)
267 \end{align*}
269 \begin{align*}
270 \hat{P}_{STOP} (STOP|h, right, adj) =
271 \hat{d}(\RGOL{h}, \GOR{h},=,=) +
272 \hat{d}(\SEAL{h}, \LGOR{h},\leq,=)
273 \end{align*}
276 \subsubsection{Root reestimation}
277 Following \citet[p.~46]{prescher-em}, to find the reestimated
278 probability of a PCFG rule, we first find the new treebank frequencies
279 $f_{T_P}(tree)=P(tree)/P_s$, then for a rule $X' \rightarrow X$ we
280 divide the new frequencies of the trees which use this rule and by
281 those of the trees containing the node $X'$. $ROOT$ appears once per
282 tree, meaning we divide by $1$ per sentence\footnote{Assuming each
283 tree has frequency $1$.}, so $\hat{P}_{ROOT}(h)=\sum_{tree:ROOT
284 \rightarrow \SEAL{h} \text{ used in } tree} f_{T_P}(tree)=\sum_{tree:ROOT
285 \rightarrow \SEAL{h} \text{ used in } tree} P(tree)/P_s$, which turns into:
287 \begin{align*}
288 \hat{P}_{ROOT} (h) = \frac
289 {\sum_{s\in S} 1 / P_s \cdot \sum_{\LOC{h}\in s} P_{ROOT}(\LOC{h}) P_{INSIDE_s}(\SEAL{h}, 0, len(s))}
290 {\sum_{s\in S} 1}
291 \end{align*}
296 \subsection{Alternate CNF-like rules}
297 Since the IO algorithm as described in \citet{lari-csl90} is made for
298 rules in Chomsky Normal Form (CNF), we have an alternate grammar
299 (figure \ref{cnf-like}) for running testing purposes, where we don't
300 have to sum over the different $loc(h)$ in IO. This is not yet
301 generalized to include left-first attachment. It is also not quite
302 CNF, since it includes some unary rewrite rules.
304 \begin{figure}[htp]
305 \centering
306 \begin{tabular} % four left-aligned math tabs, one vertical line
307 { >{$}l<{$} >{$}l<{$} >{$}l<{$} | >{$}l<{$} }
308 \multicolumn{3}{c}{Rule} & \multicolumn{1}{c}{$P_{RULE}$ ($a[i,j,k]$ in \citet{lari-csl90})}\\
309 \hline{}
311 \RN{\GOR{h}} \rightarrow& \GOR{h} &\SEAL{a} &P_{STOP}(\neg stop|h, right, adj) \cdot P_{ATTACH}(a|h, right) \\
312 &&&\\
313 \RN{\GOR{h}} \rightarrow& \RN{\GOR{h}} &\SEAL{a} &P_{STOP}(\neg stop|h, right, non\text{-}adj) \cdot P_{ATTACH}(a|h, right) \\
314 &&&\\
315 \RGOL{h} \rightarrow& \GOR{h} &STOP &P_{STOP}(stop|h, right, adj) \\
316 &&&\\
317 \RGOL{h} \rightarrow& \RN{\GOR{h}} &STOP &P_{STOP}(stop|h, right, non\text{-}adj) \\
318 &&&\\
319 \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\RGOL{h} &P_{STOP}(\neg stop|h, left, adj) \cdot P_{ATTACH}(a|h, left) \\
320 &&&\\
321 \LN{\RGOL{h}} \rightarrow& \SEAL{a} &\LN{\RGOL{h}} &P_{STOP}(\neg stop|h, left, non\text{-}adj) \cdot P_{ATTACH}(a|h, left) \\
322 &&&\\
323 \SEAL{h} \rightarrow& STOP &\RGOL{h} &P_{STOP}(stop|h, left, adj) \\
324 &&&\\
325 \SEAL{h} \rightarrow& STOP &\LN{\RGOL{h}} &P_{STOP}(stop|h, left, non\text{-}adj) \\
326 \end{tabular}
327 \caption{Alternate CFG rules (where a child node has an arrow below,
328 we use non-adjacent probabilities), defined for all words/POS-tags
329 $h$.}\label{cnf-like}
330 \end{figure}
332 The inside probabilities are the same as those given in
333 \citet{lari-csl90}, with the following exceptions:
335 When calculating $P_{INSIDE}(\SMTR{h}, i, j)$ and summing through
336 possible rules which rewrite $\SMTR{h}$, if a rule is of the form
337 $\SMTR{h} \rightarrow STOP ~ \SDTR{h}$ or $\SMTR{h} \rightarrow
338 \SDTR{h} ~ STOP$, we add $P_{RULE}\cdot P_{INSIDE}(\SDTR{h}, i, j)$
339 (that is, rewrite for the same sentence range); and, as a consequence
340 of these unary rules: for ``terminal rules'' ($P_{ORDER}$) to be
341 applicable, not only must $i = j-1$, but also the left-hand side
342 symbol of the rule must be of the form $\GOR{h}$.
344 Similarly, the outside probabilities are the same as those for pure
345 CNF rules, with the exception that we add the unary rewrite
346 probabilities
347 \begin{align*}
348 \sum_{\SMTR{h}} [&P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow \SDTR{h} ~ STOP) \\
349 + &P_{OUTSIDE}(\SMTR{h},i,j)\cdot P_{RULE}(\SMTR{h} \rightarrow STOP ~ \SDTR{h})]
350 \end{align*}
351 to $P_{OUTSIDE}(\SDTR{h},i,j)$ (eg. $f(s,t,i)$).
353 This grammar gave the same results for inside and outside
354 probabilities when run over our corpus.
356 \subsection{TODO: Initialization}
357 \citet{klein-thesis} describes DMV initialization using a ``harmonic
358 distribution'' for the initial probabilities, where the probability of
359 one word heading another is higher if they appear closer to one
360 another.
362 There are several ways this could be implemented. We initialized
363 attachment probabilities with the following formula:
365 \begin{align*}
366 P_{ATTACH}(a|h,right) = \frac
367 {\sum_{s \in S}\sum_{\LOC{h} \in s} \sum_{\LOC{a} \in s:loc(\LOC{a})>loc(\LOC{h})} 1/(loc(\LOC{a})-loc(\LOC{h})) + C_A}
368 {\sum_{s \in S}\sum_{\LOC{h} \in s} \sum_{\LOC{w} \in s:loc(\LOC{w})>loc(\LOC{h})} 1/(loc(\LOC{w})-loc(\LOC{h})) + C_A}
369 \end{align*}
371 The probability of stopping adjacently (left or right) was increased
372 whenever a word occured at a (left or right) sentence
373 border\footnote{For non-adjacent stopping we checked for occurence at
374 the second(-to-last) position.}:
376 \begin{align*}
377 f(stop:\LOC{h},left,adj)=\begin{cases}
378 C_S \text{, if } loc(\LOC{h}) = 0,\\
379 0 \text{, otherwise}
380 \end{cases}
381 \end{align*}
383 \begin{align*}
384 P_{STOP}(stop|h,left,adj) = \frac
385 {C_{M} + \sum_{s \in S}\sum_{\LOC{h} \in s} f(stop:\LOC{h},left,adj)}
386 {C_{M} + \sum_{s \in S}\sum_{\LOC{h} \in s} C_S+C_N}
387 \end{align*}
389 \subsection{TODO: Results}
390 We tried various values for the initialization constants $C_A, C_M, C_S$
391 and $C_N$; but it was hard to find any clear pattern for what worked
392 best.
394 % todo: check ~/dmv__zero_harmonic_c.txt and paste here
395 We compared with a dependency parsed version of the WSJ-10
396 corpus. Since single word sentences were not POS-tagged there, these
397 were skipped. Also, the dependency parsed WSJ-10 did not have ROOT
398 nodes, thus we both checked precision and recall without our ROOT
399 dependency, and with a ROOT link added to the parses (where possible;
400 221 parses had several heads that were not dependents, here we skipped
401 the parses).
403 Table \ref{tab:dmv-wsj} shows the results of 40 iterations on the full
404 WSJ-10 corpus, compared with the dependency parsed version.
405 \begin{table*}
406 \centering
407 \begin{tabular}{cccccc}
408 & Rooted & & & Unrooted & \\
409 P & R & F1 & P & R & F1
410 \end{tabular}
411 \caption{DMV results on the WSJ-10}
412 \label{tab:dmv-wsj}
413 \end{table*}
416 \section{The combined model (?)}
417 \subsection{Results (?)}
418 \section{Conclusion}
422 \nocite{lari-csl90}
423 \nocite{klein-thesis}
424 \nocite{km-dmv}
425 \bibliography{./statistical.bib}
426 \bibliographystyle{plainnat}
428 \end{document}