From a80630656d5b8fb8fc640908259ab38d0ef9a34c Mon Sep 17 00:00:00 2001 From: Kevin Brubeck Unhammer Date: Mon, 9 Jun 2008 14:09:24 +0200 Subject: [PATCH] todo: test PSTOP(h|ln) --- src/dmv.py | 38 +++++++++----------------------------- src/dmv.pyc | Bin 16490 -> 16448 bytes src/io.pyc | Bin 6735 -> 6846 bytes 3 files changed, 9 insertions(+), 29 deletions(-) diff --git a/src/dmv.py b/src/dmv.py index 05ee6e8..ae39a98 100755 --- a/src/dmv.py +++ b/src/dmv.py @@ -404,14 +404,17 @@ def reestimate_zeros(h_nums): return f def reestimate(g, corpus): - '''P_STOP(-STOP|...) = 1 - P_STOP(STOP|...) ''' + '''current todo. + P_STOP(-STOP|...) = 1 - P_STOP(STOP|...) ''' f = reestimate_zeros(g.head_nums) for sent in corpus: chart = {} sent_nums = [g.tagnum(w) for i,w in enumerate(sent)] for loc_h,h in zip(range(len(sent)), sent_nums): inner_sent(loc_h, g, sent, chart) + io.debug( debug_chart (g,sent,chart) ,'reest_chart') prune_sent(loc_h, g, sent_nums, chart) + io.debug( debug_chart( g,sent,chart) ,'reest_chart') for s in range(loc_h): # s 0.0: @@ -460,24 +464,7 @@ def testreestimation(): 'det nn vbd', 'det nn vbd c vbd pp', 'det nn vbd pp', 'det nn vbd det', ]] g = harmonic.initialize(corpus) - - print "takes some time" - for r in g.h_rules(h): - if r.L()==STOP: - print r -# print "off-set the rule, see what happens:" -# r.probN = 0.7 -# print r - for i in range(3): - pstophln = reestimate(g, corpus) - print "p(STOP|%s,L,N):%s"%(h_tag,pstophln) - - for r in g.h_rules(h): - if r.L()==STOP: - print r - r.probN = pstophln - print r - return None + reestimate(g, corpus) def testgrammar_h(): # Non, Adj _h_ = DMV_Rule((LRBAR,0), STOP, ( RBAR,0), 1.0, 1.0) # LSTOP @@ -504,16 +491,9 @@ def testprune(): print debug_chart(g,sent,chart) def testreestimation_h(): + io.DEBUG=['reest'] g = testgrammar_h() - for r in g.h_rules(0): - if r.L()==STOP: - print r - for i in range(9): - reestimate(g, ['h h h'.split()]) - for r in g.h_rules(0): - if r.L()==STOP: - print r - print "doing something obviously wrong here..." + reestimate(g,['h h h'.split()]) if __name__ == "__main__": io.DEBUG = [] diff --git a/src/dmv.pyc b/src/dmv.pyc index 5c0b077101fcbf2325aff1c92b953decd88caa53..0864141ee5e1e752e89b4c986dbda98a4b24c09b 100644 GIT binary patch delta 1170 zcwU85OHUI~6h3EWI%P^*il!t8NH>ORB99h?K(R!PiHX5j7mW`@!nCwRJG8x+E@o_# zCQ97io&E`xKS1I_<63_J3ti~KL|wT+&zb3fCMccWFL%!Q&UYSn?&lWnv{2vv773+? zUh1#Lhc;9I0bm1^5!wij0*#VlBV1a#r^pZE3(WyVXdnV0bg)&X(BJNYwnCIWyrY0) zfD-}{1lwR8BH0?WH9*k?WdlU$5333$awv*!pWgLK<+s+Cc7IaHxm~lzaGH#9&+L}= zeikzX>l#??ea}DZiIduLZ_}T6H`|YjIm#cW>FhUIQvKnC57YMMan|ELQF!Ie2;4|U z0iZrdu)0~R6)GYjEYlhdCFm#LrBBD`3k0YY3U#r(T3V?I`8|C(R*#UeviV^4R6Su9 zDzkEWauJWo$CG2R6XeN_Gu&n<$eqdO=?k<4wFiTsqmJir5W;vI2i+ARTOX|<9L6ym z?O STgHSjY($Na5s+6GPW}e*bk|A% delta 1237 zcwU85OKTHR6h7xBnPieo+gK7vAG8#-X`yNQlw$e-i)}%>h>K8sft|FS(sq(&su5>M zB=`rsAc$*ME{gUK2yWcD(S^$@xNxB&uC$`(Op*#wk(v8(?|I(yopYa6@IwWagD+Yz z8+)QWO?7)c02Y7^U?5Zx908>xv=Qd7d`0;>HFtdsfT|#qdw5xZ zqkjBvGf>}M*GzyK|MS`hLp@rJRC~X7DOxhK|uv4u?xc3PM=2S7_DyX!C_3gIC(HO`BUJI9GmLLZFy;G z37;Q4npzY1R)*8Bl-nHEl<(8a<r&`aGU0Fo3;h~LKYTFI?&r-0@yUekB6rlh@>(Y3VYCCFgvBO zrnr57i>ub-7V^Yro@QvCR-E>6$P(!bi#iaweDh_4>o8O{xLg2~FPFoYNu zWF;Dx@1|pY-T+$sdn_$x>i5MrGMrynrE*G2JUobnqg1=n7JBtb6CgMuo)bg ipG}}CAIw(zhtwXmMGdN9&21qi5vovekQ%9p-}nnU3*Wo| diff --git a/src/io.pyc b/src/io.pyc index 10b2c64a8853e9c931829e61eb43a33c3f276ad5..920f57e60d4c2485ec8aa752725b73785f159e8b 100644 GIT binary patch delta 794 zcwS{vO=}ZT6rGpJOeUF+*oJ(})OJd1&DdyT8WXJxD{7%dO66G)6$Hf^W?@pBDs&?h z1rY=ncV(!!aIH|rjT^zeB7%QH=t8n^)JO$=GxOS5 z-A{d&*(I7tFD3a^FBRCyN`i`3nGVz}Jyuin#v0FTCnZU0KfK?q(t_GlxFJG%s@B-N zh?nS(dYUz8*{IXK)CYEm#ICn!7%7^N%Pk=z-v?P>xfInD5SX?mtL^X8BT$P4f! z`el@Is5yySmH<^#Sy~Q$YkkI6=x1gp?|?oAdICEYa9ETo(MvtUE>ll`pyHT2G-5Q_ zs_^&Rb-`B#d@a`Cp_+@j3M{OVX?B@QyJnTG)4o}!o;9t2evJ&P&Zg-41bb`H-Z z{Gq;#wd^=rJMeM)?ZO5luEYU^uBi*gA6>JbMN_h&$;t;=6lr$JBheK3n#{z z&hFA%>t0MO7I-)oEB4VTFVZ$o({Em;FMN)cj2fG!_r_0_q(gJkPHCDZT;M7lbDw^g z&cKXxw@e95w-~;HNq@}QD^VCQC{fU4f|n98Y3GpPNN|(0uwiu=tI-J`q0_iWM|N!} zgRB5qeXuw%tQsC0F{55@HR|{TcFKLN0`8tYD+w5Re8uU<9}Z zjCQ9L(#2sJ$ak-M zwUoSOtJT