add_root fn written
[dmvccm.git] / src / main.py
blobb6136713a23493e7005af5d458d37c5977fe28a6
1 from common_dmv import ROOTNUM
2 from wsjdep import WSJDepCorpusReader
3 from loc_h_dmv import DMV_Grammar, reestimate, DEBUG, mpp
4 from loc_h_harmonic import initialize
6 # try:
7 # return add_root(parse)
8 # except:
9 # print "No single possible root, todo what?"
10 # return parse
11 #todo
13 # todo:
14 def evaluate(g, tagged_and_parsed_sents):
15 '''
16 tagged_and_parsed_sents is a list of pairs:
17 (tagonly_sent, parsed_sent)
19 R_num += 1 if pair from parsed is in mpp
20 R_den += 1 per pair from parsed
22 P_num += 1 if pair from mpp is in parsed
23 P_den += 1 per pair from mpp
25 F1 = (2 * P * R)/(P + R), harmonisk snitt av P og R
26 '''
27 recall_num = 0
28 recall_den = 0
29 precision_num = 0
30 precision_den = 0
32 for sent, parse in tagged_and_parsed_sents:
33 mpp_sent = mpp(g, sent)
34 for pair in parse:
35 recall_den += 1
36 if pair in mpp_sent: recall_num += 1
37 for pair in mpp_sent:
38 if pair[0][0] == ROOTNUM: continue # todo: add ROOT to parses?
39 precision_den += 1
40 if pair in parse: precision_num += 1
42 recall = recall_num / recall_den
43 precision = precision_num / precision_den
44 F1 = (2 * recall * precision) / (precision + recall)
46 return "Recall: %.4f\tPrecision: %.4f\tF1: %.4f"%(recall, precision, F1)
49 if __name__ == "__main__":
50 print "main.py:"
51 reader = WSJDepCorpusReader(None)
53 tagonlys = reader.tagonly_sents()
54 g = initialize(tagonlys[0:500])
55 print g
57 # DEBUG.add('REEST')
58 # f = reestimate(g, reader.tagonly_sents())
59 # print g
61 print evaluate(g, tags_and_parses[0:500])
63 print "main.py: done"