test do_stdref, too
[osm-ro-tools.git] / OsmRoConsistency.py
blobab94efe915acf5ec1a937572a4d443a365031176
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 # vim:noet:ts=4:sw=4:
6 import re
7 import unicodedata
10 import os
11 if os.path.exists('OsmApi.py'):
12 import sys
13 sys.path.insert(0, '.')
14 #from OsmApi import OsmApi
16 from OsmUtils import OsmApiExt
18 from getopt import getopt
19 from copy import deepcopy
22 import logging
23 logging.basicConfig(filename='area.log', level=logging.INFO)
25 __stdref_comm__ = \
26 u'make refs conform to format En+/D[NJC]n+x?, where n = 0-9 and x = A-Z'
27 __euref2int_ref_comm__ = u'move EU refs (En+) to int_ref;' + \
28 u'Romanian mappers agreed ref should contain only national refs'
30 __usage__ = "\nUsage:\n\t%s --lat=nn.nnnn --lon=nn.nnnn [--bbox=nn]\n"
32 patterns = {
33 'natref' : { 're' : u'(d[njc])\s*(\d+)\s*([a-z]?)\s*', 'rpl' : [ 1, 2, 3 ] },
34 'intref' : { 're' : u'(e)\s*(\d+)\s*' , 'rpl' : [ 1, 2 ] },
36 'strada' : { 're' : u'^\s*(str?a?|[ds]trada)[.\t ]+(.*)$' , 'rpl' : [ u'Strada ', 2 ] },
37 'stradela' : { 're' : u'^\s*(str-la|sdla|[ds]tradela)[.\t ]+(.*)$' , 'rpl' : [ u'Stradela ', 2 ] },
38 'alee' : { 're' : u'^\s*(ale?|alee?a)[.\t ]+(.*)$' , 'rpl' : [ u'Aleea ', 2 ] },
40 'intrare' : { 're' : u'^\s*(intr?|intrarea)[.\t ]+(.*)$' , 'rpl' : [ u'Intrarea ', 2 ] },
41 'fundatura': { 're' : u'^\s*(fndt?|fund[aă]tura)[.\t ]+(.*)$' , 'rpl' : [ u'Fundătura ', 2 ] },
42 'fundac' : { 're' : u'^\s*(fdc|fundacul)[.\t ]+(.*)$' , 'rpl' : [ u'Fundacul ', 2 ] },
44 # maybe 'sp' should be disabled 'cause it might be 'Spitalul'?
45 'splai' : { 're' : u'^\s*(spl?|splaiu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Splaiul ', 2 ] },
47 'bulevard' : { 're' : u'^\s*(b[dl]|b[-.]?d?ul|blvd?|bulev|bulevardu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Bulevardul ', 2 ] },
49 'piata' : { 're' : u'^\s*(pia[tţț][aă]|p-?[tţț]a)[.\t ]+(.*)$' , 'rpl' : [ u'Piața ', 2 ] },
50 'varf' : { 're' : u'^\s*(vf|v[iaîâ]rfu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Vârful ', 2 ] },
51 'munte' : { 're' : u'^\s*(mn?t|m-te|m-tele|muntele)[.\t ]+(.*)$', 'rpl' : [ u'Muntele ', 2 ] }
54 ref_patterns = ['natref', 'intref']
55 name_patterns = ['strada', 'stradela', 'alee',
56 'intrare', 'fundatura', 'fundac',
57 'splai', 'bulevard',
58 'piata',
59 'varf', 'munte']
62 def get_shiftedpatrep(patlist, shift=0):
63 """
64 Based on the pattern (list) patlist, the numbers representing positions of back
65 references in the initial meta-regex patlist will be 'shifted' with shift
66 positions and the resulting real replace pattern will be returned"""
67 sl = []
69 for e in patlist:
70 se = u''
71 if type(e) == type(0):
72 se = u'\\%s' % (e + shift)
73 elif type(e) == type(u''):
74 se = e
75 elif type(e) == type(''):
76 se = e.decode()
77 else:
78 raise TypeError
80 sl.append(se)
82 return ''.join(sl)
85 def do_stdref(amap, changes=[]):
87 ch = deepcopy(changes)
89 natref = patterns['natref']
90 intref = patterns['intref']
91 pat = u'(' + natref['re'] + u'\s*|' + intref['re'] + u'\s*)'
92 patrep = get_shiftedpatrep(natref['rpl'], 1)
93 patrepalt = get_shiftedpatrep(intref['rpl'], 1 + len(natref['rpl']))
95 refedways = OsmApiExt.grepElementsInMap(amap,
96 etype=u'way', tag=u'ref', pattern=pat)
98 for w in refedways:
100 wid = w[u'id']
101 in_ref = w[u'tag'][u'ref']
102 logging.debug(u'Testing way %d: ref = > %s < ...' % (wid, in_ref))
104 refs = [x.strip() for x in in_ref.split(u';')]
105 dnref = {}
107 for oref in refs:
109 try:
110 nref = re.sub(pat, patrep, oref, flags=re.I).encode().upper()
111 except:
112 logging.debug(' >>> alt for >%s<' % (oref))
113 nref = re.sub(pat, patrepalt, oref, flags=re.I).encode().upper()
115 if len(nref.strip()) > 0:
116 dnref[nref] = 1
118 lnref = dnref.keys()
119 lnref.sort() # this puts the european refs at the end and adds consistency
120 out_ref = u';'.join(lnref)
122 if in_ref == out_ref:
123 logging.debug(u' Way %d : ignored (no change): %s -> %s' %
124 (wid, in_ref, out_ref))
125 else:
126 logging.debug(u' >>> Way %d : ref change: %s -> %s' %
127 (wid, in_ref, out_ref))
128 logging.info('Way %d: ref change: %s -> %s' % (wid, in_ref, out_ref))
130 w[u'tag'][u'ref'] = out_ref
131 #print(w)
132 logging.debug(' api.WayUpdate(w) - w[u\'id\'] = %d' % (wid))
133 ch.append({u'type':u'way',
134 #u'id': wid, #TODO: check and see if this is useful
135 u'data': w,
136 'oldrecs':{u'ref': in_ref},
137 'newrecs':{u'ref': out_ref}
140 logging.info('Done')
141 return ch
144 def standardRefsMap(amap, commextra=u''):
145 comm = __stdref_comm__ + commextra
146 api = OsmApiExt(appname=u'OsmRoConsistency', comment=comm)
148 changes = do_stdref(amap)
149 # TODO: apply or report changes
151 del api
154 def standardRefsXY(lat, lon, bbox_km=10):
155 api = OsmApiExt(appname=u'OsmRoConsistency', comment=__stdref_comm__)
157 api.mapFocus(lat=lat, lon=lon)
159 mymap = api.MapAroundPoint(bbox_km=bbox_km)
161 changes = do_stdref(mymap)
162 # TODO: apply or report changes
164 del api
167 def splitstripunique(value, separator=u';'):
169 From an initial set of values separated with the separator symbol,
170 generate a list of unique sorted stripped unicode values.
171 The input can be string or unicode.
173 E.g.:
174 For input u'b; a; a; c; dd; d;b' the function will return
175 [ u'a', u'b', u'c', u'd', u'dd' ]
178 if type(separator) != type(u''):
179 separator = separator.decode()
181 l = map(unicode.strip, value.split(separator))
183 dl = {}
184 for k in l:
185 if k != u'':
186 dl[k] = 1
187 l = dl.keys()
188 l.sort()
190 return l
193 def do_eref2int (amap, changes=[]):
195 Searches in the 'ref' fields of ways in the input amap for values that look
196 like references of European roads e.g. "E80", "E 81" and moves these in the
197 'int_ref' field where they belong, after correcting the format
199 ch = deepcopy(changes)
201 irpat = patterns['intref']['re']
202 intrep = get_shiftedpatrep(patterns['intref']['rpl'])
203 compre = re.compile(irpat, re.I + re.U)
205 refedways = [x for x in amap if x[u'type'] == u'way' and
206 x[u'data'].get(u'tag', {}).get('ref', u'') != u'']
208 eways = OsmApiExt.grepElementsInMap(etype=u'way',
209 tag=u'ref',
210 pattern=(u'^.*' + irpat + '.*$'),
211 amap=refedways)
213 for w in eways:
215 wid = w[u'id']
216 oref = w[u'tag'][u'ref']
217 loref = map(unicode.upper, splitstripunique(oref))
218 oiref = w[u'tag'].get(u'int_ref', u'')
219 loirefs = map(unicode.upper, splitstripunique(oiref))
221 erefs = filter(lambda x: re.match(compre, x), loref)
222 non_erefs = [x for x in loref if not x in erefs]
223 non_erefs.sort()
224 nref = u';'.join(non_erefs)
227 d = {}
228 for er in erefs + loirefs:
229 if er != u'':
230 canonical_er = re.sub(compre, intrep, er).encode()
231 d[canonical_er] = 1
233 lniref = d.keys()
234 lniref.sort()
235 niref = u';'.join(lniref)
237 w[u'tag'][u'ref'] = nref
238 if (len(nref)==0):
239 del w[u'tag'][u'ref']
240 w[u'tag'][u'int_ref'] = niref
241 # no need to test if niref is empty,
242 # we don't get here unless there are such items
244 logging.info(u"Way %d changed: from (ref=\"%s\", int_ref=\"%s\") to (ref=\"%s\", int_ref=\"%s\")",
245 wid, oref, oiref, nref, niref)
247 ch.append({u'type': u'way',
248 u'data': w,
249 'oldrecs':{u'int_ref': oiref, u'ref':oref},
250 'newrecs':{u'int_ref': niref, u'ref':nref}
252 #api.WayUpdate(w)
254 logging.info(u"e ref to int DONE")
255 return ch
258 def do_correct_names(amap, api, with_update=False):
260 namedways = [x[u'data'] for x in amap if ((x[u'type'] == u'way') and
261 (x[u'data'].get(u'tag', {}).get(u'name', u'') != u''))]
262 namednodes = [x[u'data'] for x in amap if ((x[u'type'] == u'node') and
263 (x[u'data'].get(u'tag', {}).get(u'name', u'') != u''))]
265 prop = {
266 u'node': {'name': u'Node', 'updfunc': api.NodeUpdate},
267 u'way' : {'name': u'Way' , 'updfunc': api.WayUpdate }
270 for ntype in name_patterns:
271 logging.debug("Processing type %s...", ntype)
272 pat = patterns[ntype]['re']
273 patrep = get_shiftedpatrep(patterns[ntype]['rpl'])
274 logging.debug("Pattern: \"%s\"", patrep)
275 compre = re.compile(pat, re.I + re.U)
277 rennodes = [x for x in namednodes if (re.match(compre, x[u'tag'][u'name']))]
278 renways = [x for x in namedways if (re.match(compre, x[u'tag'][u'name']))]
279 nodecount = len(rennodes)
281 pos = 0
282 for e in rennodes + renways:
283 pos += 1
284 etype = u'node' if pos <= nodecount else u'way'
286 oname = e[u'tag'][u'name']
287 nname = re.sub(compre, patrep, oname).encode()
289 if nname != oname:
290 e[u'tag'][u'name'] = nname
291 logging.info(u"%s %d changed: from name=\"%s\" to name=\"%s\"",
292 prop[etype]['name'], e[u'id'], oname, nname)
293 if with_update:
294 prop[etype]['updfunc'](e)
295 else:
296 logging.debug(u"%s %d's name is identical after prepocessing pattern \"%s\"",
297 prop[etype]['name'], e[u'id'], patrep)
299 logging.info('name correction done')
302 def usage():
303 print __usage__ % (sys.argv[0])
305 if __name__ == '__main__':
307 try:
308 opts, args = getopt (sys.argv[1:], '', ["lon=", "lat=", "bbox="])
309 except GetoptError, err:
310 print str(err)
311 usage()
312 sys.exit(2)
314 if args:
315 print "Program received unknown arguments"
316 usage()
317 sys.exit(3)
319 _lat = None
320 _lon = None
321 _bbox = None
322 for o, a in opts:
323 if o == '--lon':
324 logging.info("lon=%s" % (a))
325 _lon = a
326 elif o == '--lat':
327 logging.info("lat=%s" % (a))
328 _lat = a
329 elif o == '--bbox':
330 logging.info("bbox=%s" % (a))
331 _bbox = int(a)
333 if not _lat or not _lon:
334 print "Insuficient arguments. Both lon and lat must be defined."
335 usage()
336 sys.exit(1)
338 if _bbox:
339 standardRefsXY(lon=_lon, lat=_lat, bbox_km=_bbox)
340 else:
341 standardRefsXY(lon=_lon, lat=_lat)