add executable flag to file
[osm-ro-tools.git] / contrib / searchwrongnames.py
blob58f6a89d8eefaede9368c45c29d8b38a83cbf5b1
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 #
4 # Requirements: Imposm (Omniscale)
5 # sort (GNU coreutils)
7 # © Michael Häckel, GPLv3
10 from imposm.parser import OSMParser
11 import codecs, re, subprocess
13 searchtags = [ 'name', 'alt_name', 'addr:name', 'addr:street', 'description' ]
15 def formatName(input):
16 return re.sub(u'[\r\n\t]', ' ', re.sub(u'^(Strada |Str. |Piata |Piața |B-dul |B-dul. |Bulevardul |Calea |Aleea |Fundătura |Fundatura |Soseaua |Șoseaua |Sos. |Șos. |Gara |Stația )', '', input)) + '\n'
18 def makeList():
19 class NameSearch(object):
21 def nodes(self, nodes):
22 for osmid, tags, refs in nodes:
23 for tag in searchtags:
24 if tag in tags:
25 f.write('n')
26 f.write(str(osmid))
27 f.write('\t')
28 f.write(formatName(tags[tag]))
30 def ways(self, ways):
31 for osmid, tags, refs in ways:
32 for tag in searchtags:
33 if tag in tags:
34 f.write('w')
35 f.write(str(osmid))
36 f.write('\t')
37 f.write(formatName(tags[tag]))
39 def relations(self, relations):
40 for osmid, tags, refs in relations:
41 for tag in searchtags:
42 if tag in tags:
43 f.write('r')
44 f.write(str(osmid))
45 f.write('\t')
46 f.write(formatName(tags[tag]))
48 f = codecs.open('names.txt', 'w', 'utf8')
49 search = NameSearch()
50 p = OSMParser(concurrency=4, nodes_callback=search.nodes, ways_callback=search.ways, relations_callback=search.relations)
51 p.parse('romania.osm.pbf')
52 f.close();
54 def toAscii(input):
55 return input.replace(u'ș',u's').replace(u'ț',u't') \
56 .replace(u'â',u'a').replace(u'ă',u'a').replace(u'î',u'i') \
57 .replace(u'Ș',u's').replace(u'Ț',u't').replace(u'Â',u'a') \
58 .replace(u'Ă',u'a').replace(u'Î',u'i').lower()
60 def makeComparison():
61 s = codecs.open('names-sorted.txt', 'r', 'utf8')
62 d = codecs.open('names-evaluated.txt', 'w', 'utf8')
63 p = codecs.open('names-evaluated-plain.txt', 'w', 'utf8')
65 lastname = ''
66 lastnumbers = ''
67 found = False
68 for line in s:
69 splitted = line.split('\t')
70 name = splitted[1]
71 if toAscii(name) == toAscii(lastname):
72 if name != lastname:
73 d.write(lastnumbers + '\n' + lastname)
74 p.write(lastname)
75 found = True
76 lastnumbers = splitted[0]
77 else:
78 lastnumbers = lastnumbers + ', ' + splitted[0]
79 else:
80 if found:
81 d.write(lastnumbers + '\n' + lastname + '\n')
82 p.write(lastname + '\n')
83 found = False
84 lastnumbers = splitted[0]
85 lastname = name
86 s.close()
87 d.close()
88 p.close()
91 print('Making word list.')
92 makeList()
94 print('Sorting.')
95 subprocess.call(['sort', '-k', '2', '-o', 'names-sorted.txt', 'names.txt'])
97 print('Comparing.')
98 makeComparison()
100 print('Finished.')