some late comments :)
[gostyle.git] / utils / sgf_load.py
blob1ad1435a2fc7dc79e6c4f7d849aa32a46c2b9b77
1 import codecs
2 import logging
4 ### XXX XXX
5 import sys
6 sys.path.append('../')
8 from sgflib import SGFParser
10 def my_err(exc):
11 wrong_part = exc.object[exc.start:exc.end+1]
12 try:
13 us = wrong_part.decode('utf-8')
14 return (us, exc.end+1)
15 except:
16 diff = exc.end-exc.start
17 if diff > 4:
18 logging.warn("sgf_load.py : Long chain of chars (%d) badly encoded."%diff)
19 return (u'?'*(exc.end-exc.start), exc.end)
21 codecs.register_error('my_err', my_err)
23 class ParseError(Exception):
24 pass
26 def load_sgf_file_headers(filename):
27 """Returns list of dictionaries.
28 Each dictionary contains all header fields of corresponding gametree."""
29 with open(filename, 'r') as f:
30 sgfdata = f.read()
32 try:
33 collection = SGFParser(sgfdata).parse()
34 except:
35 raise ParseError
37 ret = []
38 for gametree in collection:
39 ret.append(process_gametree(gametree))
41 return ret
43 def list_attributes(node):
44 return node.data.keys()
46 def get_attribute(node, atr):
47 try:
48 atr = node.data[atr].data[0]
49 ret = atr.decode(errors='my_err')
50 return ret
51 except KeyError:
52 return None
54 def process_gametree(gametree):
55 # cursor for tree traversal
56 c = gametree.cursor()
57 # first node is the header
58 header = c.node
60 attributes = list_attributes(header)
61 d = {}
62 for key in attributes:
63 atr = get_attribute(header, key)
64 if atr:
65 d[key] = atr
66 return d
68 if __name__ == '__main__':
70 print load_sgf_file_headers('../TEST_FILES/games/1930-01-00a.sgf')