update and description of the db building process
[aoi.git] / tools / parse_kradfile.py
blob5a40ee7e40208378549e4c6496590410816f6929
1 #!/usr/bin/env python3
3 # Script for converting kradfile-u to SQL script.
4 # Uses custom list of strokes count (FILE_STROKES).
6 # Check for components withou strokes (aoi will treat them as with strokes=0)
7 # select * from components where ifnull(strokes ,0)=0
9 import time
11 FILE_KRAD = 'kradfile-u'
12 FILE_STROKES = 'components.strokes'
13 SCRIPT_NAME = 'script.components.sql'
14 VERSION = 'build: ' + time.strftime("%Y-%m-%d")
16 data = {} # kanji:components
17 set_comps = set() # all used components
18 components = {} # component:strokes
21 def load_kradfile():
22 with open( FILE_KRAD ) as f:
23 for line in f:
24 line = line.strip()
25 if ( line[0] == '#' ):
26 continue
27 line = line.replace( ' ', '' )
28 kanji, comps = line.split(':')
29 for c in comps:
30 set_comps.add(c)
31 data[kanji] = comps
32 print("%s kanji, %s radicals" %(len(data),len(set_comps)));
35 def load_strokes_file():
36 with open( FILE_STROKES ) as f:
37 for line in f:
38 line = line.strip()
39 component, strokes = line.split(':')
40 components[component] = strokes
41 print("%s components" %len(components))
44 if __name__ == '__main__':
46 res = input("Kradfile-u version [%s]: " %VERSION)
47 if res:
48 VERSION = res
50 load_kradfile()
51 # load_strokes_file()
53 with open( SCRIPT_NAME, 'w' ) as f:
54 f.write( "begin transaction;\n" )
55 f.write("drop table if exists components;\n")
56 f.write("create table components ( component TEXT, strokes INT );\n")
57 f.write( "REPLACE INTO aoi (key,val) VALUES('kradfile_version','%s');\n" %VERSION)
58 for c in set_comps:
59 f.write("insert into components (component,strokes) values ('%s',NULL);\n" %(c))
60 for d in data:
61 f.write("update k_kanji set components='%s' where kanji='%s';\n" %(data[d],d));
62 f.write( "end transaction;\n" )
63 # add stroke count
64 f.write( "begin transaction;\n" )
65 f.write( "update components set strokes=(select strokes from k_kanji where kanji=component);\n" )
66 # manual fix for kradfile-u
67 f.write("update components set strokes=4 where component='⺹';\n")
68 f.write("update components set strokes=3 where component='⺾';\n")
69 f.write("update components set strokes=3 where component='⻖';\n")
70 f.write("update components set strokes=2 where component='マ';\n")
71 f.write("update components set strokes=1 where component='ノ';\n")
72 f.write("update components set strokes=2 where component='ユ';\n")
73 f.write("update components set strokes=2 where component='⺅';\n")
74 f.write("update components set strokes=1 where component='|';\n")
75 f.write("update components set strokes=3 where component='⺌';\n")
76 f.write("update components set strokes=3 where component='⻏';\n")
77 f.write("update components set strokes=3 where component='ヨ';\n")
78 f.write("update components set strokes=2 where component='ハ';\n")
79 f.write( "end transaction;\n" )