report/statistical.bib

   1 @phdthesis{klein-thesis,
   2         title = {{The Unsupervised Learning of Natural Language Structure}},
   3         author = {Dan Klein},
   4         school = {Stanford University},
   5         year = 2005,
   6         biburl = {http://www.bibsonomy.org/bibtex/2f6777dff0e2773f224f8989b4c189e17/unhammer},
   7         keywords = {DG statistical unsupervised parsing NLP}
   8 }
   9
  10 @inproceedings{km-ccm,
  11         title = {A Generative Constituent-Context Model for Improved Grammar Induction.},
  12         author = {Dan Klein and Christopher D. Manning},
  13         booktitle = {ACL},
  14         pages = {128-135},
  15         year = 2002,
  16         url = {http://dblp.uni-trier.de/db/conf/acl/acl2002.html#KleinM02},
  17         ee = {http://www.aclweb.org/anthology/P02-1017.pdf},
  18         date = {2003-08-27},
  19         biburl = {http://www.bibsonomy.org/bibtex/2880755e3cc5b4c0e2b0cf1c599d93565/unhammer},
  20         keywords = {statistical haveread NLP syllabus parsing grammar_induction}
  21 }
  22
  23 @inproceedings{km-dmv,
  24         title = {Corpus-Based Induction of Syntactic Structure: Models of Dependency and Constituency.},
  25         author = {Dan Klein and Christopher D. Manning},
  26         booktitle = {ACL},
  27         pages = {478-485},
  28         year = 2004,
  29         url = {http://dblp.uni-trier.de/db/conf/acl/acl2004.html#KleinM04},
  30         ee = {http://acl.ldc.upenn.edu/acl2004/main/pdf/341_pdf_2-col.pdf},
  31         date = {2005-01-14},
  32         biburl = {http://www.bibsonomy.org/bibtex/250c1e45dc65ad24b16a624e74231d951/unhammer},
  33         keywords = {syllabus statistical dependency haveread NLP grammar_induction}
  34 }
  35
  36 @article{zuidema-acquisition,
  37         title = {Children's grammars grow more abstract with age - Evidence from an automatic procedure for identifying the productive units of language},
  38         author = {Gideon Borensztajn and Willem Zuidema and Rens Bod},
  39         year = 2008,
  40         url = {http://staff.science.uva.nl/~gideon/borbodzui08cogsci-def.pdf},
  41         description = {«...conﬁrms the progressive abstraction hypothesis: abstraction, deﬁned as the relative number of non-terminal leaves in multi-word constructions, increases with age. We show that it does so independently of sentence length. Complex constructions lose their lexical parts to specialized lexical rewrite rules, and in the process the constructicon becomes more abstract
  42 ...
  43 our version of the progressive abstraction hypothesis now becomes _falsiﬁable_»},
  44         biburl = {http://www.bibsonomy.org/bibtex/2e99bc92dec88420adfc4520f4a1f7a34/unhammer},
  45         keywords = {CxG statistical corpus language_acquisition}
  46 }
  47
  48 @article{venkatapathy2005rcn,
  49         title = {{Relative compositionality of noun+ verb multiword expressions in Hindi}},
  50         author = {Sriram Venkatapathy and Aaravind K. Joshi and Preeti Agarwala},
  51         journal = {Proceedings of International Conference on Natural Language Processing (ICON’05), Kanpur},
  52         year = 2005,
  53         url = {http://www.iiit.net/techreports/2007_81.pdf},
  54         abstract = {Measuring the relative compositionality of Multi-word expressions (MWEs) is crucial to Natural Language Processing. Hindi contains a rich set of Noun+Verb MWEs and hence, it is very important to handle them. Very limited work was done previously towards characterizing the MWEs in Hindi of Noun+Verb type. Also, various statistical measures which are used to measure the compositionality of different kinds of collocations in English cannot be applied straight-away to Hindi due to insufficient corpus and resources. In this paper, we analyze in detail the types of Noun+Verb expressions in Hindi. We then propose an approach to measure their relative compositionality automatically using maximum entropy model (MaxEnt). MaxEnt integrates various measures representing the properties of the Noun+Verb expressions in Hindi. Some of the measures used by the MaxEnt are computed by mapping them to Verb-Noun expressions in English.},
  55         biburl = {http://www.bibsonomy.org/bibtex/274d4d47eda67a81ff4246103ff973200/unhammer},
  56         keywords = {NLP corpus compositionality statistical complex_predicates syntax}
  57 }
  58
  59 @unpublished{baayen_R,
  60         title = {Practical Data Analysis for the Language Sciences with R},
  61         address = {Cambridge, MA},
  62         author = {R. Harald Baayen},
  63         publisher = {Cambridge University Press.},
  64         year = 2008,
  65         biburl = {http://www.bibsonomy.org/bibtex/24763ed773860ebad4db237c39867b9f0/unhammer},
  66         keywords = {statistical R analysis corpus}
  67 }
  68
  69 @article{li2003ela,
  70         title = {{An Expert Lexicon Approach to Identifying English Phrasal Verbs}},
  71         author = {W. Li and X. Zhang and C. Niu and Y. Jiang and R. Srihari},
  72         journal = {Proc. of the 41st Annual Meeting of the ACL},
  73         pages = {513--20},
  74         year = 2003,
  75         url = {http://acl.ldc.upenn.edu/acl2003/main/pdfs/Li.pdf},
  76         biburl = {http://www.bibsonomy.org/bibtex/2cb838b337a5802053bc31f3f4a9d85bd/unhammer},
  77         keywords = {parser morphology NLP syntax lexicon phrasal_verbs statistical particle_verbs}
  78 }
  79
  80 @phdthesis{bod1995els,
  81         title = {{Enriching linguistics with statistics: performance models of natural language}},
  82         author = {R. Bod},
  83         year = 1995,
  84         biburl = {http://www.bibsonomy.org/bibtex/27ae06b227a849412bdebccec068d7a38/unhammer},
  85         keywords = {STSG performance statistical syllabus syntax haveread DOP}
  86 }
  87
  88 @article{bresnan2008gge,
  89         title = {{Gradient grammar: An effect of animacy on the syntax of give in New Zealand and American English}},
  90         author = {J. Bresnan and J. Hay},
  91         journal = {Lingua},
  92         number = 2,
  93         pages = {245--259},
  94         publisher = {Elsevier},
  95         volume = 118,
  96         year = 2008,
  97         biburl = {http://www.bibsonomy.org/bibtex/243e3d7fd8482a45ffe570a807879d558/unhammer},
  98         keywords = {semantics grammar statistical toread syntax animacy}
  99 }
 100
 101 @article{bod1992cml,
 102         title = {{A computational model of language performance: Data Oriented Parsing}},
 103         author = {R. Bod},
 104         journal = {Proceedings of the 14th conference on Computational linguistics-Volume 3},
 105         pages = {855--859},
 106         publisher = {Association for Computational Linguistics Morristown, NJ, USA},
 107         year = 1992,
 108         description = {The original...},
 109         biburl = {http://www.bibsonomy.org/bibtex/2f4c5696f56e671cd983a8a39c5e5eb2d/unhammer},
 110         keywords = {toread parsing statistical DOP}
 111 }
 112
 113 @article{way-lfg-dop,
 114         title = {A hybrid architecture for robust MT using LFG-DOP},
 115         author = {Andy Way},
 116         journal = {Journal of Experimental & Theoretical Artificial Intelligence},
 117         number = 3,
 118         pages = {441--471},
 119         publisher = {Taylor and Francis Ltd},
 120         volume = 11,
 121         year = 1999,
 122         url = {http://www.nclt.dcu.ie/lfg-dop/pubs/Way_99.ps},
 123         abstract = {We develop a model for machine translation (MT) based on data-oriented parsing (DOP) allied to the syntactic representations of lexical functional grammar (LFG). We begin by showing that in themselves, none of the main paradigmatic approaches to MT currently suffice to the standard required. Nevertheless, each of these approaches contains elements which if properly harnessed should lead to an overall improvement in translation performance. It is in this new hybrid spirit that our search for a better solution to the problems of MT can be seen. We summarize the original DOP model of Bod, as well as the DOT model of translation of Poutsma on which it is based. We demonstrate that DOT is not guaranteed to produce the correct translation, despite provably deriving the most probable translation. We go on to critically evaluate previous attempts at LFG-MT, commenting briefly on particular problem cases for such systems. We then show how the LFG-DOP model of Bod and Kaplan can be extended to serve as a novel hybrid model for MT which promises to improve upon DOT, as well as the pure LFG-based translation model.},
 124         biburl = {http://www.bibsonomy.org/bibtex/290fdf61c16719f525c67d9781f370996/unhammer},
 125         keywords = {toread statistical DOP MT LFG}
 126 }
 127
 128 @article{goodman2002epd,
 129         title = {Efficient Parsing of DOP with PCFG-Reductions},
 130         author = {J. Goodman},
 131         journal = {Bod et al. 2002b},
 132         year = 2002,
 133         url = {http://research.microsoft.com/~joshuago/dop-csli.ps},
 134         biburl = {http://www.bibsonomy.org/bibtex/2f76839ea98f36073ef2f989cd7795d99/unhammer},
 135         keywords = {PCFG toread syllabus NLP statistical DOP parsing}
 136 }
 137
 138 @article{collins-hdsnlp,
 139         title = {Head-Driven Statistical Models for Natural Language Parsing.},
 140         author = {Michael Collins},
 141         journal = {Computational Linguistics},
 142         number = 4,
 143         pages = {589-637},
 144         volume = 29,
 145         year = 2003,
 146         url = {http://dblp.uni-trier.de/db/journals/coling/coling29.html#Collins03},
 147         date = {2004-03-26},
 148         biburl = {http://www.bibsonomy.org/bibtex/2b61315e8ab92f576ae3de7cb87c5460b/unhammer},
 149         keywords = {head-driven NLP statistical toread}
 150 }
 151
 152 @article{prescher-em,
 153         title = {A Tutorial on the Expectation-Maximization Algorithm Including Maximum-Likelihood Estimation and EM Training of Probabilistic Context-Free Grammars},
 154         author = {Detlef Prescher},
 155         journal = {CoRR},
 156         note = {informal publication},
 157         pages = 49,
 158         volume = {abs/cs/0412015},
 159         year = 2004,
 160         url = {http://dblp.uni-trier.de/db/journals/corr/corr0412.html#abs-cs-0412015},
 161         ee = {http://arxiv.org/abs/cs/0412015},
 162         date = {2008-01-02},
 163         description = {dblp},
 164         biburl = {http://www.bibsonomy.org/bibtex/27684ca4c422922d880e247ce5ebceb2b/unhammer},
 165         keywords = {syllabus statistical EM ML}
 166 }
 167
 168 @article{carroll1992tel,
 169         title = {Two experiments on learning probabilistic dependency grammars from corpora},
 170         author = {G. Carroll and E. Charniak},
 171         journal = {Working Notes of the Workshop Statistically-Based NLP Techniques},
 172         pages = {1--13},
 173         year = 1992,
 174         url = {http://citeseer.ist.psu.edu/cache/papers/cs/483/ftp:zSzzSzftp.cs.brown.eduzSzpubzSztechreportszSz92zSzcs92-16.pdf/carroll92two.pdf},
 175         biburl = {http://www.bibsonomy.org/bibtex/2ab653bd555bdafc6dc761ce6e32bba44/unhammer},
 176         keywords = {statistical toread corpus syllabus ML grammar}
 177 }
 178
 179 @article{lari-csl90,
 180         title = {The estimation of stochastic context-free grammars using the Inside-Outside algorithm},
 181         author = {K. Lari and S. J. Young},
 182         journal = {Computer Speech and Language},
 183         pages = {35--56},
 184         volume = 4,
 185         year = 1990,
 186         biburl = {http://www.bibsonomy.org/bibtex/2b9f6798bb092697da7042ca3f5dee795/unhammer},
 187         keywords = {NLP EM toread algorithm statistical}
 188 }
 189
 190 @article{eslick2005lnl,
 191         title = {Langutils--A natural language toolkit for Common Lisp},
 192         author = {Ian Eslick and Hugo Liu},
 193         journal = {Proceedings of the International Conference on Lisp},
 194         year = 2005,
 195         url = {http://www.media.mit.edu/~hugo/publications/papers/ILC2005-langutils.pdf},
 196         biburl = {http://www.bibsonomy.org/bibtex/2136f2687f142217d6bafa05ae549c17f/unhammer},
 197         keywords = {toread lisp statistical corpus NLP}
 198 }
 199
 200 @book{manning99foundations,
 201         title = {Foundations of Statistical Natural Language Processing},
 202         author = {Christopher D. Manning and Hinrich Schütze},
 203         howpublished = {Hardcover},
 204         month = {June},
 205         publisher = {{The MIT Press}},
 206         year = 1999,
 207         isbn = {0262133601},
 208         biburl = {http://www.bibsonomy.org/bibtex/29b2ddcf0d31e9f0d787b2bce8803fa96/unhammer},
 209         keywords = {syllabus statistical NLP}
 210 }
 211
 212 @book{Cha93,
 213         title = {{Statistical Language Learning}},
 214         address = {Cambridge MA},
 215         author = {E. Charniak},
 216         publisher = {MIT Press},
 217         year = 1993,
 218         biburl = {http://www.bibsonomy.org/bibtex/26dcaa41af94bd8a97936cf1f17e8284a/unhammer},
 219         keywords = {NLP syllabus statistical toread}
 220 }
 221
 222 @article{rayson1997sdu,
 223         title = {{Social differentiation in the use of English vocabulary: Some analyses of the conversational component of the British National Corpus}},
 224         author = {P. RAYSON and G. LEECH and M. HODGES},
 225         journal = {International journal of corpus linguistics},
 226         number = 1,
 227         pages = {133--152},
 228         publisher = {Benjamins Publishing},
 229         volume = 2,
 230         year = 1997,
 231         description = {Gender, age, social class etc. and their differences in vocabulary use.},
 232         biburl = {http://www.bibsonomy.org/bibtex/227d5ef4b1e88811468424646245685b8/unhammer},
 233         keywords = {linguistics corpus statistical imported sociolinguistics gender}
 234 }
 235