2 # -*- coding: utf-8 -*-
4 # Requirements: Imposm (Omniscale)
7 # © Michael Häckel, GPLv3
10 from imposm
.parser
import OSMParser
11 import codecs
, re
, subprocess
13 searchtags
= [ 'name', 'alt_name', 'addr:name', 'addr:street', 'description' ]
15 def formatName(input):
16 return re
.sub(u
'[\r\n\t]', ' ', re
.sub(u
'^(Strada |Str. |Piata |Piața |B-dul |B-dul. |Bulevardul |Calea |Aleea |Fundătura |Fundatura |Soseaua |Șoseaua |Sos. |Șos. |Gara |Stația )', '', input)) + '\n'
19 class NameSearch(object):
21 def nodes(self
, nodes
):
22 for osmid
, tags
, refs
in nodes
:
23 for tag
in searchtags
:
28 f
.write(formatName(tags
[tag
]))
31 for osmid
, tags
, refs
in ways
:
32 for tag
in searchtags
:
37 f
.write(formatName(tags
[tag
]))
39 def relations(self
, relations
):
40 for osmid
, tags
, refs
in relations
:
41 for tag
in searchtags
:
46 f
.write(formatName(tags
[tag
]))
48 f
= codecs
.open('names.txt', 'w', 'utf8')
50 p
= OSMParser(concurrency
=4, nodes_callback
=search
.nodes
, ways_callback
=search
.ways
, relations_callback
=search
.relations
)
51 p
.parse('romania.osm.pbf')
55 return input.replace(u
'ș',u
's').replace(u
'ț',u
't') \
56 .replace(u
'â',u
'a').replace(u
'ă',u
'a').replace(u
'î',u
'i') \
57 .replace(u
'Ș',u
's').replace(u
'Ț',u
't').replace(u
'Â',u
'a') \
58 .replace(u
'Ă',u
'a').replace(u
'Î',u
'i').lower()
61 s
= codecs
.open('names-sorted.txt', 'r', 'utf8')
62 d
= codecs
.open('names-evaluated.txt', 'w', 'utf8')
63 p
= codecs
.open('names-evaluated-plain.txt', 'w', 'utf8')
69 splitted
= line
.split('\t')
71 if toAscii(name
) == toAscii(lastname
):
73 d
.write(lastnumbers
+ '\n' + lastname
)
76 lastnumbers
= splitted
[0]
78 lastnumbers
= lastnumbers
+ ', ' + splitted
[0]
81 d
.write(lastnumbers
+ '\n' + lastname
+ '\n')
82 p
.write(lastname
+ '\n')
84 lastnumbers
= splitted
[0]
91 print('Making word list.')
95 subprocess
.call(['sort', '-k', '2', '-o', 'names-sorted.txt', 'names.txt'])