fixed a little bug in root reestimation (forgot to divide by len(corpus)=f_T_q(ROOT))
[dmvccm.git] / report / statistical.bib
blobee7813d2b5620779181a8117f726c0787d2ef492
1 @phdthesis{klein-thesis,
2 title = {{The Unsupervised Learning of Natural Language Structure}},
3 author = {Dan Klein},
4 school = {Stanford University},
5 year = 2005,
6 biburl = {http://www.bibsonomy.org/bibtex/2f6777dff0e2773f224f8989b4c189e17/unhammer},
7 keywords = {DG statistical unsupervised parsing NLP}
10 @inproceedings{km-ccm,
11 title = {A Generative Constituent-Context Model for Improved Grammar Induction.},
12 author = {Dan Klein and Christopher D. Manning},
13 booktitle = {ACL},
14 pages = {128-135},
15 year = 2002,
16 url = {http://dblp.uni-trier.de/db/conf/acl/acl2002.html#KleinM02},
17 ee = {http://www.aclweb.org/anthology/P02-1017.pdf},
18 date = {2003-08-27},
19 biburl = {http://www.bibsonomy.org/bibtex/2880755e3cc5b4c0e2b0cf1c599d93565/unhammer},
20 keywords = {statistical haveread NLP syllabus parsing grammar_induction}
23 @inproceedings{km-dmv,
24 title = {Corpus-Based Induction of Syntactic Structure: Models of Dependency and Constituency.},
25 author = {Dan Klein and Christopher D. Manning},
26 booktitle = {ACL},
27 pages = {478-485},
28 year = 2004,
29 url = {http://dblp.uni-trier.de/db/conf/acl/acl2004.html#KleinM04},
30 ee = {http://acl.ldc.upenn.edu/acl2004/main/pdf/341_pdf_2-col.pdf},
31 date = {2005-01-14},
32 biburl = {http://www.bibsonomy.org/bibtex/250c1e45dc65ad24b16a624e74231d951/unhammer},
33 keywords = {syllabus statistical dependency haveread NLP grammar_induction}
36 @article{zuidema-acquisition,
37 title = {Children's grammars grow more abstract with age - Evidence from an automatic procedure for identifying the productive units of language},
38 author = {Gideon Borensztajn and Willem Zuidema and Rens Bod},
39 year = 2008,
40 url = {http://staff.science.uva.nl/~gideon/borbodzui08cogsci-def.pdf},
41 description = {«...confirms the progressive abstraction hypothesis: abstraction, defined as the relative number of non-terminal leaves in multi-word constructions, increases with age. We show that it does so independently of sentence length. Complex constructions lose their lexical parts to specialized lexical rewrite rules, and in the process the constructicon becomes more abstract
42 ...
43 our version of the progressive abstraction hypothesis now becomes _falsifiable_»},
44 biburl = {http://www.bibsonomy.org/bibtex/2e99bc92dec88420adfc4520f4a1f7a34/unhammer},
45 keywords = {CxG statistical corpus language_acquisition}
48 @article{venkatapathy2005rcn,
49 title = {{Relative compositionality of noun+ verb multiword expressions in Hindi}},
50 author = {Sriram Venkatapathy and Aaravind K. Joshi and Preeti Agarwala},
51 journal = {Proceedings of International Conference on Natural Language Processing (ICON’05), Kanpur},
52 year = 2005,
53 url = {http://www.iiit.net/techreports/2007_81.pdf},
54 abstract = {Measuring the relative compositionality of Multi-word expressions (MWEs) is crucial to Natural Language Processing. Hindi contains a rich set of Noun+Verb MWEs and hence, it is very important to handle them. Very limited work was done previously towards characterizing the MWEs in Hindi of Noun+Verb type. Also, various statistical measures which are used to measure the compositionality of different kinds of collocations in English cannot be applied straight-away to Hindi due to insufficient corpus and resources. In this paper, we analyze in detail the types of Noun+Verb expressions in Hindi. We then propose an approach to measure their relative compositionality automatically using maximum entropy model (MaxEnt). MaxEnt integrates various measures representing the properties of the Noun+Verb expressions in Hindi. Some of the measures used by the MaxEnt are computed by mapping them to Verb-Noun expressions in English.},
55 biburl = {http://www.bibsonomy.org/bibtex/274d4d47eda67a81ff4246103ff973200/unhammer},
56 keywords = {NLP corpus compositionality statistical complex_predicates syntax}
59 @unpublished{baayen_R,
60 title = {Practical Data Analysis for the Language Sciences with R},
61 address = {Cambridge, MA},
62 author = {R. Harald Baayen},
63 publisher = {Cambridge University Press.},
64 year = 2008,
65 biburl = {http://www.bibsonomy.org/bibtex/24763ed773860ebad4db237c39867b9f0/unhammer},
66 keywords = {statistical R analysis corpus}
69 @article{li2003ela,
70 title = {{An Expert Lexicon Approach to Identifying English Phrasal Verbs}},
71 author = {W. Li and X. Zhang and C. Niu and Y. Jiang and R. Srihari},
72 journal = {Proc. of the 41st Annual Meeting of the ACL},
73 pages = {513--20},
74 year = 2003,
75 url = {http://acl.ldc.upenn.edu/acl2003/main/pdfs/Li.pdf},
76 biburl = {http://www.bibsonomy.org/bibtex/2cb838b337a5802053bc31f3f4a9d85bd/unhammer},
77 keywords = {parser morphology NLP syntax lexicon phrasal_verbs statistical particle_verbs}
80 @phdthesis{bod1995els,
81 title = {{Enriching linguistics with statistics: performance models of natural language}},
82 author = {R. Bod},
83 year = 1995,
84 biburl = {http://www.bibsonomy.org/bibtex/27ae06b227a849412bdebccec068d7a38/unhammer},
85 keywords = {STSG performance statistical syllabus syntax haveread DOP}
88 @article{bresnan2008gge,
89 title = {{Gradient grammar: An effect of animacy on the syntax of give in New Zealand and American English}},
90 author = {J. Bresnan and J. Hay},
91 journal = {Lingua},
92 number = 2,
93 pages = {245--259},
94 publisher = {Elsevier},
95 volume = 118,
96 year = 2008,
97 biburl = {http://www.bibsonomy.org/bibtex/243e3d7fd8482a45ffe570a807879d558/unhammer},
98 keywords = {semantics grammar statistical toread syntax animacy}
101 @article{bod1992cml,
102 title = {{A computational model of language performance: Data Oriented Parsing}},
103 author = {R. Bod},
104 journal = {Proceedings of the 14th conference on Computational linguistics-Volume 3},
105 pages = {855--859},
106 publisher = {Association for Computational Linguistics Morristown, NJ, USA},
107 year = 1992,
108 description = {The original...},
109 biburl = {http://www.bibsonomy.org/bibtex/2f4c5696f56e671cd983a8a39c5e5eb2d/unhammer},
110 keywords = {toread parsing statistical DOP}
113 @article{way-lfg-dop,
114 title = {A hybrid architecture for robust MT using LFG-DOP},
115 author = {Andy Way},
116 journal = {Journal of Experimental & Theoretical Artificial Intelligence},
117 number = 3,
118 pages = {441--471},
119 publisher = {Taylor and Francis Ltd},
120 volume = 11,
121 year = 1999,
122 url = {http://www.nclt.dcu.ie/lfg-dop/pubs/Way_99.ps},
123 abstract = {We develop a model for machine translation (MT) based on data-oriented parsing (DOP) allied to the syntactic representations of lexical functional grammar (LFG). We begin by showing that in themselves, none of the main paradigmatic approaches to MT currently suffice to the standard required. Nevertheless, each of these approaches contains elements which if properly harnessed should lead to an overall improvement in translation performance. It is in this new hybrid spirit that our search for a better solution to the problems of MT can be seen. We summarize the original DOP model of Bod, as well as the DOT model of translation of Poutsma on which it is based. We demonstrate that DOT is not guaranteed to produce the correct translation, despite provably deriving the most probable translation. We go on to critically evaluate previous attempts at LFG-MT, commenting briefly on particular problem cases for such systems. We then show how the LFG-DOP model of Bod and Kaplan can be extended to serve as a novel hybrid model for MT which promises to improve upon DOT, as well as the pure LFG-based translation model.},
124 biburl = {http://www.bibsonomy.org/bibtex/290fdf61c16719f525c67d9781f370996/unhammer},
125 keywords = {toread statistical DOP MT LFG}
128 @article{goodman2002epd,
129 title = {Efficient Parsing of DOP with PCFG-Reductions},
130 author = {J. Goodman},
131 journal = {Bod et al. 2002b},
132 year = 2002,
133 url = {http://research.microsoft.com/~joshuago/dop-csli.ps},
134 biburl = {http://www.bibsonomy.org/bibtex/2f76839ea98f36073ef2f989cd7795d99/unhammer},
135 keywords = {PCFG toread syllabus NLP statistical DOP parsing}
138 @article{collins-hdsnlp,
139 title = {Head-Driven Statistical Models for Natural Language Parsing.},
140 author = {Michael Collins},
141 journal = {Computational Linguistics},
142 number = 4,
143 pages = {589-637},
144 volume = 29,
145 year = 2003,
146 url = {http://dblp.uni-trier.de/db/journals/coling/coling29.html#Collins03},
147 date = {2004-03-26},
148 biburl = {http://www.bibsonomy.org/bibtex/2b61315e8ab92f576ae3de7cb87c5460b/unhammer},
149 keywords = {head-driven NLP statistical toread}
152 @article{prescher-em,
153 title = {A Tutorial on the Expectation-Maximization Algorithm Including Maximum-Likelihood Estimation and EM Training of Probabilistic Context-Free Grammars},
154 author = {Detlef Prescher},
155 journal = {CoRR},
156 note = {informal publication},
157 pages = 49,
158 volume = {abs/cs/0412015},
159 year = 2004,
160 url = {http://dblp.uni-trier.de/db/journals/corr/corr0412.html#abs-cs-0412015},
161 ee = {http://arxiv.org/abs/cs/0412015},
162 date = {2008-01-02},
163 description = {dblp},
164 biburl = {http://www.bibsonomy.org/bibtex/27684ca4c422922d880e247ce5ebceb2b/unhammer},
165 keywords = {syllabus statistical EM ML}
168 @article{carroll1992tel,
169 title = {Two experiments on learning probabilistic dependency grammars from corpora},
170 author = {G. Carroll and E. Charniak},
171 journal = {Working Notes of the Workshop Statistically-Based NLP Techniques},
172 pages = {1--13},
173 year = 1992,
174 url = {http://citeseer.ist.psu.edu/cache/papers/cs/483/ftp:zSzzSzftp.cs.brown.eduzSzpubzSztechreportszSz92zSzcs92-16.pdf/carroll92two.pdf},
175 biburl = {http://www.bibsonomy.org/bibtex/2ab653bd555bdafc6dc761ce6e32bba44/unhammer},
176 keywords = {statistical toread corpus syllabus ML grammar}
179 @article{lari-csl90,
180 title = {The estimation of stochastic context-free grammars using the Inside-Outside algorithm},
181 author = {K. Lari and S. J. Young},
182 journal = {Computer Speech and Language},
183 pages = {35--56},
184 volume = 4,
185 year = 1990,
186 biburl = {http://www.bibsonomy.org/bibtex/2b9f6798bb092697da7042ca3f5dee795/unhammer},
187 keywords = {NLP EM toread algorithm statistical}
190 @article{eslick2005lnl,
191 title = {Langutils--A natural language toolkit for Common Lisp},
192 author = {Ian Eslick and Hugo Liu},
193 journal = {Proceedings of the International Conference on Lisp},
194 year = 2005,
195 url = {http://www.media.mit.edu/~hugo/publications/papers/ILC2005-langutils.pdf},
196 biburl = {http://www.bibsonomy.org/bibtex/2136f2687f142217d6bafa05ae549c17f/unhammer},
197 keywords = {toread lisp statistical corpus NLP}
200 @book{manning99foundations,
201 title = {Foundations of Statistical Natural Language Processing},
202 author = {Christopher D. Manning and Hinrich Schütze},
203 howpublished = {Hardcover},
204 month = {June},
205 publisher = {{The MIT Press}},
206 year = 1999,
207 isbn = {0262133601},
208 biburl = {http://www.bibsonomy.org/bibtex/29b2ddcf0d31e9f0d787b2bce8803fa96/unhammer},
209 keywords = {syllabus statistical NLP}
212 @book{Cha93,
213 title = {{Statistical Language Learning}},
214 address = {Cambridge MA},
215 author = {E. Charniak},
216 publisher = {MIT Press},
217 year = 1993,
218 biburl = {http://www.bibsonomy.org/bibtex/26dcaa41af94bd8a97936cf1f17e8284a/unhammer},
219 keywords = {NLP syllabus statistical toread}
222 @article{rayson1997sdu,
223 title = {{Social differentiation in the use of English vocabulary: Some analyses of the conversational component of the British National Corpus}},
224 author = {P. RAYSON and G. LEECH and M. HODGES},
225 journal = {International journal of corpus linguistics},
226 number = 1,
227 pages = {133--152},
228 publisher = {Benjamins Publishing},
229 volume = 2,
230 year = 1997,
231 description = {Gender, age, social class etc. and their differences in vocabulary use.},
232 biburl = {http://www.bibsonomy.org/bibtex/227d5ef4b1e88811468424646245685b8/unhammer},
233 keywords = {linguistics corpus statistical imported sociolinguistics gender}