make a stub that should apply changes or not to an api
[osm-ro-tools.git] / OsmRoConsistency.py
blob0188e8fe1d4f5d6c3547022ffbb85cd98f8ac290
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 # vim:noet:ts=4:sw=4:
6 import re
7 import unicodedata
10 import os
11 if os.path.exists('OsmApi.py'):
12 import sys
13 sys.path.insert(0, '.')
14 #from OsmApi import OsmApi
16 from OsmUtils import OsmApiExt
18 from getopt import getopt
19 from copy import deepcopy
22 import logging
23 logging.basicConfig(filename='area.log', level=logging.INFO)
25 __stdref_comm__ = \
26 u'make refs conform to format En+/D[NJC]n+x?, where n = 0-9 and x = A-Z'
27 __euref2int_ref_comm__ = u'move EU refs (En+) to int_ref;' + \
28 u'Romanian mappers agreed ref should contain only national refs'
30 __usage__ = "\nUsage:\n\t%s --lat=nn.nnnn --lon=nn.nnnn [--bbox=nn]\n"
32 patterns = {
33 'natref' : { 're' : u'(d[njc])\s*(\d+)\s*([a-z]?)\s*', 'rpl' : [ 1, 2, 3 ] },
34 'intref' : { 're' : u'(e)\s*(\d+)\s*' , 'rpl' : [ 1, 2 ] },
36 'strada' : { 're' : u'^\s*(str?a?|[ds]trada)[.\t ]+(.*)$' , 'rpl' : [ u'Strada ', 2 ] },
37 'stradela' : { 're' : u'^\s*(str-la|sdla|[ds]tradela)[.\t ]+(.*)$' , 'rpl' : [ u'Stradela ', 2 ] },
38 'alee' : { 're' : u'^\s*(ale?|alee?a)[.\t ]+(.*)$' , 'rpl' : [ u'Aleea ', 2 ] },
40 'intrare' : { 're' : u'^\s*(intr?|intrarea)[.\t ]+(.*)$' , 'rpl' : [ u'Intrarea ', 2 ] },
41 'fundatura': { 're' : u'^\s*(fndt?|fund[aă]tura)[.\t ]+(.*)$' , 'rpl' : [ u'Fundătura ', 2 ] },
42 'fundac' : { 're' : u'^\s*(fdc|fundacul)[.\t ]+(.*)$' , 'rpl' : [ u'Fundacul ', 2 ] },
44 # maybe 'sp' should be disabled 'cause it might be 'Spitalul'?
45 'splai' : { 're' : u'^\s*(spl?|splaiu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Splaiul ', 2 ] },
47 'bulevard' : { 're' : u'^\s*(b[dl]|b[-.]?d?ul|blvd?|bulev|bulevardu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Bulevardul ', 2 ] },
49 'piata' : { 're' : u'^\s*(pia[tţț][aă]|p-?[tţț]a)[.\t ]+(.*)$' , 'rpl' : [ u'Piața ', 2 ] },
50 'varf' : { 're' : u'^\s*(vf|v[iaîâ]rfu?l?)[.\t ]+(.*)$' , 'rpl' : [ u'Vârful ', 2 ] },
51 'munte' : { 're' : u'^\s*(mn?t|m-te|m-tele|muntele)[.\t ]+(.*)$', 'rpl' : [ u'Muntele ', 2 ] }
54 ref_patterns = ['natref', 'intref']
55 name_patterns = ['strada', 'stradela', 'alee',
56 'intrare', 'fundatura', 'fundac',
57 'splai', 'bulevard',
58 'piata',
59 'varf', 'munte']
62 def get_shiftedpatrep(patlist, shift=0):
63 """
64 Based on the pattern (list) patlist, the numbers representing positions of back
65 references in the initial meta-regex patlist will be 'shifted' with shift
66 positions and the resulting real replace pattern will be returned"""
67 sl = []
69 for e in patlist:
70 se = u''
71 if type(e) == type(0):
72 se = u'\\%s' % (e + shift)
73 elif type(e) == type(u''):
74 se = e
75 elif type(e) == type(''):
76 se = e.decode()
77 else:
78 raise TypeError
80 sl.append(se)
82 return ''.join(sl)
85 def do_stdref(amap, changes=[]):
87 ch = deepcopy(changes)
89 natref = patterns['natref']
90 intref = patterns['intref']
91 pat = u'(' + natref['re'] + u'\s*|' + intref['re'] + u'\s*)'
92 patrep = get_shiftedpatrep(natref['rpl'], 1)
93 patrepalt = get_shiftedpatrep(intref['rpl'], 1 + len(natref['rpl']))
95 refedways = OsmApiExt.grepElementsInMap(amap,
96 etype=u'way', tag=u'ref', pattern=pat)
98 for w in refedways:
100 wid = w[u'id']
101 in_ref = w[u'tag'][u'ref']
102 logging.debug(u'Testing way %d: ref = > %s < ...' % (wid, in_ref))
104 refs = [x.strip() for x in in_ref.split(u';')]
105 dnref = {}
107 for oref in refs:
109 try:
110 nref = re.sub(pat, patrep, oref, flags=re.I).encode().upper()
111 except:
112 logging.debug(' >>> alt for >%s<' % (oref))
113 nref = re.sub(pat, patrepalt, oref, flags=re.I).encode().upper()
115 if len(nref.strip()) > 0:
116 dnref[nref] = 1
118 lnref = dnref.keys()
119 lnref.sort() # this puts the european refs at the end and adds consistency
120 out_ref = u';'.join(lnref)
122 if in_ref == out_ref:
123 logging.debug(u' Way %d : ignored (no change): %s -> %s' %
124 (wid, in_ref, out_ref))
125 else:
126 logging.debug(u' >>> Way %d : ref change: %s -> %s' %
127 (wid, in_ref, out_ref))
128 logging.info('Way %d: ref change: %s -> %s' % (wid, in_ref, out_ref))
130 w[u'tag'][u'ref'] = out_ref
131 #print(w)
132 logging.debug(' api.WayUpdate(w) - w[u\'id\'] = %d' % (wid))
133 ch.append({u'type':u'way',
134 #u'id': wid, #TODO: check and see if this is useful
135 u'data': w,
136 'oldrecs':{u'ref': in_ref},
137 'newrecs':{u'ref': out_ref}
140 logging.info('Done')
141 return ch
144 def _applyChanges2Map(changes, api):
145 #TODO: implement this
146 print (u"NOT IMPLEMENTED YET: _applyChanges2Map(changes, api)")
147 raise NotImplementedError
150 def standardRefsMap(amap, commextra=u''):
151 comm = __stdref_comm__ + commextra
152 api = OsmApiExt(appname=u'OsmRoConsistency', comment=comm)
154 changes = do_stdref(amap)
155 _applyChanges2Map(changes, api)
157 del api
160 def standardRefsXY(lat, lon, bbox_km=10):
161 api = OsmApiExt(appname=u'OsmRoConsistency', comment=__stdref_comm__)
163 api.mapFocus(lat=lat, lon=lon)
165 mymap = api.MapAroundPoint(bbox_km=bbox_km)
167 changes = do_stdref(mymap)
168 _applyChanges2Map(changes, api)
170 del api
173 def splitstripunique(value, separator=u';'):
175 From an initial set of values separated with the separator symbol,
176 generate a list of unique sorted stripped unicode values.
177 The input can be string or unicode.
179 E.g.:
180 For input u'b; a; a; c; dd; d;b' the function will return
181 [ u'a', u'b', u'c', u'd', u'dd' ]
184 if type(separator) != type(u''):
185 separator = separator.decode()
187 l = map(unicode.strip, value.split(separator))
189 dl = {}
190 for k in l:
191 if k != u'':
192 dl[k] = 1
193 l = dl.keys()
194 l.sort()
196 return l
199 def do_eref2int (amap, changes=[]):
201 Searches in the 'ref' fields of ways in the input amap for values that look
202 like references of European roads e.g. "E80", "E 81" and moves these in the
203 'int_ref' field where they belong, after correcting the format
205 ch = deepcopy(changes)
207 irpat = patterns['intref']['re']
208 intrep = get_shiftedpatrep(patterns['intref']['rpl'])
209 compre = re.compile(irpat, re.I + re.U)
211 refedways = [x for x in amap if x[u'type'] == u'way' and
212 x[u'data'].get(u'tag', {}).get('ref', u'') != u'']
214 eways = OsmApiExt.grepElementsInMap(etype=u'way',
215 tag=u'ref',
216 pattern=(u'^.*' + irpat + '.*$'),
217 amap=refedways)
219 for w in eways:
221 wid = w[u'id']
222 oref = w[u'tag'][u'ref']
223 loref = map(unicode.upper, splitstripunique(oref))
224 oiref = w[u'tag'].get(u'int_ref', u'')
225 loirefs = map(unicode.upper, splitstripunique(oiref))
227 erefs = filter(lambda x: re.match(compre, x), loref)
228 non_erefs = [x for x in loref if not x in erefs]
229 non_erefs.sort()
230 nref = u';'.join(non_erefs)
233 d = {}
234 for er in erefs + loirefs:
235 if er != u'':
236 canonical_er = re.sub(compre, intrep, er).encode()
237 d[canonical_er] = 1
239 lniref = d.keys()
240 lniref.sort()
241 niref = u';'.join(lniref)
243 w[u'tag'][u'ref'] = nref
244 if (len(nref)==0):
245 del w[u'tag'][u'ref']
246 w[u'tag'][u'int_ref'] = niref
247 # no need to test if niref is empty,
248 # we don't get here unless there are such items
250 logging.info(u"Way %d changed: from (ref=\"%s\", int_ref=\"%s\") to (ref=\"%s\", int_ref=\"%s\")",
251 wid, oref, oiref, nref, niref)
253 ch.append({u'type': u'way',
254 u'data': w,
255 'oldrecs':{u'int_ref': oiref, u'ref':oref},
256 'newrecs':{u'int_ref': niref, u'ref':nref}
258 #api.WayUpdate(w)
260 logging.info(u"e ref to int DONE")
261 return ch
264 def do_correct_names(amap, api, with_update=False):
266 namedways = [x[u'data'] for x in amap if ((x[u'type'] == u'way') and
267 (x[u'data'].get(u'tag', {}).get(u'name', u'') != u''))]
268 namednodes = [x[u'data'] for x in amap if ((x[u'type'] == u'node') and
269 (x[u'data'].get(u'tag', {}).get(u'name', u'') != u''))]
271 prop = {
272 u'node': {'name': u'Node', 'updfunc': api.NodeUpdate},
273 u'way' : {'name': u'Way' , 'updfunc': api.WayUpdate }
276 for ntype in name_patterns:
277 logging.debug("Processing type %s...", ntype)
278 pat = patterns[ntype]['re']
279 patrep = get_shiftedpatrep(patterns[ntype]['rpl'])
280 logging.debug("Pattern: \"%s\"", patrep)
281 compre = re.compile(pat, re.I + re.U)
283 rennodes = [x for x in namednodes if (re.match(compre, x[u'tag'][u'name']))]
284 renways = [x for x in namedways if (re.match(compre, x[u'tag'][u'name']))]
285 nodecount = len(rennodes)
287 pos = 0
288 for e in rennodes + renways:
289 pos += 1
290 etype = u'node' if pos <= nodecount else u'way'
292 oname = e[u'tag'][u'name']
293 nname = re.sub(compre, patrep, oname).encode()
295 if nname != oname:
296 e[u'tag'][u'name'] = nname
297 logging.info(u"%s %d changed: from name=\"%s\" to name=\"%s\"",
298 prop[etype]['name'], e[u'id'], oname, nname)
299 if with_update:
300 prop[etype]['updfunc'](e)
301 else:
302 logging.debug(u"%s %d's name is identical after prepocessing pattern \"%s\"",
303 prop[etype]['name'], e[u'id'], patrep)
305 logging.info('name correction done')
308 def usage():
309 print __usage__ % (sys.argv[0])
311 if __name__ == '__main__':
313 try:
314 opts, args = getopt (sys.argv[1:], '', ["lon=", "lat=", "bbox="])
315 except GetoptError, err:
316 print str(err)
317 usage()
318 sys.exit(2)
320 if args:
321 print "Program received unknown arguments"
322 usage()
323 sys.exit(3)
325 _lat = None
326 _lon = None
327 _bbox = None
328 for o, a in opts:
329 if o == '--lon':
330 logging.info("lon=%s" % (a))
331 _lon = a
332 elif o == '--lat':
333 logging.info("lat=%s" % (a))
334 _lat = a
335 elif o == '--bbox':
336 logging.info("bbox=%s" % (a))
337 _bbox = int(a)
339 if not _lat or not _lon:
340 print "Insuficient arguments. Both lon and lat must be defined."
341 usage()
342 sys.exit(1)
344 if _bbox:
345 standardRefsXY(lon=_lon, lat=_lat, bbox_km=_bbox)
346 else:
347 standardRefsXY(lon=_lon, lat=_lat)