2 # -*- coding: utf-8 -*-
6 from xml
.dom
import minidom
9 if os
.path
.exists('OsmApi.py'):
11 sys
.path
.insert(0,'.')
12 from OsmApi
import OsmApi
14 from math
import cos
, radians
16 if os
.path
.exists('siruta.py'):
17 if not '.' in sys
.path
:
18 sys
.path
.insert(0,'.')
19 from sirutacsv
import SirutaDictReader
25 """strip any accents that may exist in a unicode object and leave only the base ASCII char"""
28 s
= unicode(s
,'utf-8')
31 s
= s
.replace(u
"-", u
" ")
32 return ''.join((c
for c
in unicodedata
.normalize('NFD', s
) if unicodedata
.category(c
) != 'Mn'))
35 def simplifyName(unicode_name
):
36 """a function to turn into lowercase ASCII any name (by stripping accents)"""
38 simplename
= unicode_name
.lower()
39 simplename
= stripAccents(simplename
)
43 def getNodesList(osmxmlcontents
):
44 """extracts only the nodes from an OSM XML and stores them in a OsmApi structure"""
46 if len(osmxmlcontents
) > 1:
47 raise ValueError('Too many osm blocks in one XML')
49 # select only the nodes from the OSM XML
50 xmlnodes
= osmxmlcontents
[0].getElementsByTagName ('node')
52 # TODO: use an internal data member for the api
55 for xmlnode
in xmlnodes
:
56 osmnode
= api
._DomParseNode
(xmlnode
)
57 osmnodes
.append(osmnode
)
62 def getMatchingPlaces(osmapielements
,placename
):
63 """filters from 'osmapielements' only the places with the same simplified name as 'placename'"""
66 sname
= simplifyName (placename
)
68 for i
in osmapielements
:
70 if len( i
[u
"tag"][u
"place"] ) > 0 and \
71 sname
== simplifyName(i
[u
"tag"][u
"name"]):
74 # that node didn't have the 'place' or a 'name' tag, so is uninteresting
80 def locatePlaceInXML(xml
,placename
):
81 """Looks for nodes which have the attribute 'place' and whose name looks like 'placename'
82 into the xml document 'xml'. The input xml document is the one returned by the OSM API
83 through a query for data within a bounding box"""
85 if os
.path
.exists(xml
):
86 xmldoc
= minidom
.parse(xml
)
88 xmldoc
= minidom
.parseString(xml
.encode("utf-8"))
90 # each OSM XML has a single root osm element
91 osmxmlcontents
= xmldoc
.getElementsByTagName('osm')
93 nodeslist
= getNodesList(osmxmlcontents
)
95 nodeswithtags
= [ x
for x
in nodeslist
if len(x
[u
"tag"]) > 0 ]
97 places
= getMatchingPlaces(nodeswithtags
, placename
)
101 def getArea ( bbox_str
):
102 """Given the bounding box defined by the bbox_str string, return the map within that bbox"""
104 path
= "/api/0.6/map?bbox=" + bbox_str
106 # TODO: use an internal data member for the api
108 data
= api
._get
( path
)
112 def getMapAroundPoint(lon
, lat
, bbox_km
= 10):
113 """Given the latitude 'lat' and longitude 'lon', get from the API the map around that point
114 within a bbox_km area"""
116 # one degree latitude is approximately 111km
117 # and we want to know what's half of bbox_km in lat degrees
118 delta_lat
= bbox_km
/ 222.0
122 # one degree longitude is a cos(lat) * 111
123 # and we want to know what's half of bbox_km in lon degrees
124 delta_lon
= cos( radians (lat
) ) * delta_lat
126 lat_b
= lat
- delta_lat
127 lat_t
= lat
+ delta_lat
129 lon_l
= lon
- delta_lon
130 lon_r
= lon
+ delta_lon
133 path
= "%.6f,%.6f,%.6f,%.6f" % ( lon_l
, lat_b
, lon_r
, lat_t
)
135 area_xml_string
= getArea ( path
)
137 return area_xml_string
139 def simpleName(placename
):
140 """Removes from a name of a place any prefix that indicates its clasification"""
141 simpleplacename
= placename
.replace(u
"Municipiul ",u
"",1)
142 simpleplacename
= simpleplacename
.replace(u
"Oraș ",u
"",1)
144 return simpleplacename
146 def sirutaTypeToPlace(sirutarank
, population
):
147 """Maps siruta ranks to proper 'place' values. The siruta types are explained bellow
149 Cod Denumire tip de unitate administrativ teritorială
151 40 Judeţ, municipiul Bucureşti
152 1 Municipiu reşedinţă de judeţ, reşedinţă a municipiului Bucureşti
153 2 Oraş ce aparţine de judeţ, altul decât oraş reşedinţă de judeţ
155 4 Municipiu, altul decât reşedinţă de judeţ
156 5 Oraş reşedinţă de judeţ
157 6 Sector al municipiului Bucureşti
158 9 Localitate componentă, reşedinţă de municipiu
159 10 Localitate componentă, a unui municipiu alta decât reşedinţă de municipiu
160 11 Sat ce aparţine de municipiu
161 17 Localitate componentă reşedinţă a oraşului
162 18 Localitate componentă a unui oraş, alta decât reşedinţă de oraş
163 19 Sat care aparţine unui oraş
164 22 Sat reşedinţă de comună
165 23 Sat ce aparţine de comună, altul decât reşedinţă de comună
168 rank
= int(sirutarank
)
170 # municipii, reședințe de județ, reședințe de municipiu
171 if rank
in [ 1, 4, 9, 40 ]:
173 # orașe, orașe reședință de județ, reședințe ale orașelor
174 if rank
in [ 2, 5, 17 ]:
176 # localități componente ale orașelor sau municpiilor, altele decât reședințele
177 if rank
in [ 10, 18 ]:
179 # comune, sate parte din municipii, sate parte din orașe, reședințe de comună, sate non-reședință
180 if rank
in [ 3, 11, 19, 22, 23 ]:
181 # doar satele non-reședință ce aparțin de comune pot fi cătune (hamlet)
182 if rank
== 23 and int(population
) < 50:
186 # sectoarele municipiului București
190 raise ValueError, "Unexpected rank value in siruta data"
192 def nodeDictForPlace(sirutadict
, oldnode
= None):
193 """Creates a proper dictionary structure for the node defined in 'sirutadict' taking into account
194 the existing data which is present in 'oldnode'"""
199 # it seems some of the input contains no data for the population and the rank,
200 # which probably means 0 for population, and we don't care for rank
202 if sirutadict
[u
"population2002"] == u
"":
203 sirutadict
[u
"population2002"] = u
"0"
207 tags
= oldnode
[u
"tag"]
209 if not u
"population" in tags
:
210 tags
[u
"population"] = sirutadict
[u
"population2002"]
212 if u
"postal_code" in tags
:
213 if int(tags
[u
"postal_code"]) == int(sirutadict
[u
"old_postal_code"]):
214 tags
.pop(u
"postal_code")
216 if u
"addr:postcode" in tags
:
217 if int(tags
[u
"addr:postcode"]) == int(sirutadict
[u
"old_postal_code"]):
218 tags
.pop(u
"addr:postcode")
222 node
[u
"lat"] = float(sirutadict
[u
"lat"])
223 node
[u
"lon"] = float(sirutadict
[u
"lon"])
224 tags
[u
"population"] = sirutadict
[u
"population2002"]
226 # consistently add the 1992 census data
227 tags
[u
"population:census:1992"] = sirutadict
[u
"population2002"]
229 # this should probably be ran even for existing nodes
230 tags
[u
"place"] = sirutaTypeToPlace(sirutadict
[u
"siruta:type"], tags
[u
"population"])
232 # clean up siruta:name_sup
233 sirutadict
[u
"siruta:name_sup"] = simpleName(sirutadict
[u
"siruta:name_sup"])
241 u
"siruta:region_id", \
242 u
"siruta:enviro_type", \
245 mergetags
= sirutadict
.copy()
246 for tag
in sirutadict
:
247 if tag
in uninteresting
:
250 tags
.update(mergetags
)
252 simplesup
= simpleName(sirutadict
[u
"siruta:name_sup"])
254 tags
[u
"is_in:country"] = u
"România"
255 is_in
.insert(0,sirutadict
[u
"siruta:county"])
256 tags
[u
"is_in:county"] = sirutadict
[u
"siruta:county"]
257 if tags
[u
"name"] <> simplesup
:
258 is_in
.insert(0,simplesup
)
260 tags
[u
"is_in"] = u
";".join(is_in
)
266 def getSameSiruta(elementlist
, sirutacode
):
267 """returns a list with all the elements in list which have the siruta:code == sirutacode"""
270 for x
in elementlist
:
272 if x
[u
"tag"][u
"siruta:code"] == sirutacode
:
273 newlist
.append(x
.copy())
279 def readAndProcessSirutaCsv(file, comment
= None, source
= None):
280 """reads the input CSV file and processes each entry"""
282 csvfile
= open (file, 'r')
283 reader
= SirutaDictReader( csvfile
)
285 homedir
= os
.environ
['HOME']
286 api
= OsmApi(passwordfile
= homedir
+ '/.config/osm-import-loc/osm-auth', appid
= 'RoOsmLocImporter')
289 comment
= 'import places from ' + file
291 comment
= unicode(comment
,'utf-8')
293 source
= u
"http://geo-spatial.org siruta data import"
295 source
= unicode(source
,'utf-8')
296 cs_tags
= { 'comment' : comment
, 'source' : source
}
298 api
.ChangesetCreate(cs_tags
)
300 for csvplace
in reader
:
302 uname
= csvplace
[u
"name"].encode("utf-8")
303 print "Processing data for %s ..." % ( uname
)
305 map = getMapAroundPoint ( lat
= csvplace
[u
"lat"], lon
= csvplace
[u
"lon"] )
306 existing_nodes
= locatePlaceInXML ( map, csvplace
[u
"name"] )
308 if len(existing_nodes
) == 0:
309 # node doesn't exist for this place, or is far; we can create the node
310 nodedict
= nodeDictForPlace ( csvplace
)
312 api
.NodeCreate(nodedict
)
313 print "Created new node for %s" % ( uname
)
315 elif len(existing_nodes
) > 1:
316 # I am confused, more than one node with the same simplified name
317 # try to see if there's already a siruta code attached
319 newlist
= getSameSiruta( existing_nodes
, csvplace
[u
"siruta:code"] )
320 if len(newlist
) == 1:
321 existing_nodes
= newlist
323 print >> sys
.stderr
, "Skipping %s: Too many (%d) existing nodes with the same name at (lat=%s,lon=%s)" % (
326 csvplace
[u
"lat"].encode("utf-8"),
327 csvplace
[u
"lon"].encode("utf-8") )
329 if len(existing_nodes
) == 1:
330 # there is an existing code, so we merge with that
331 referencenode
= existing_nodes
[0].copy()
332 # dictionaries don't get copied by default
333 referencenode
[u
"tag"] = existing_nodes
[0][u
"tag"].copy()
334 nodedict
= nodeDictForPlace ( csvplace
, existing_nodes
[0] )
336 if nodedict
== referencenode
:
337 print "Skipping: No changes needed for node %s" % ( uname
)
339 api
.NodeUpdate(nodedict
)
340 print "Updated existing node for %s" % ( uname
)
349 print "%s [-c|--comment <comment>] [-s|--source <source>] -i <inputcsv>" % sys
.argv
[0]
351 def main(argv
= None):
357 opts
, args
= getopt
.getopt(sys
.argv
[1:], "hi:c:s:",
358 ["help", "input=", "comment=", "source="] )
359 except getopt
.GetoptError
, err
:
360 # print help information and exit:
370 if o
in ("-h", "help"):
373 elif o
in ("-i", "--input"):
375 elif o
in ("-s", "--source"):
377 elif o
in ("-c", "--comment"):
381 print "Input csv file (-i option) is mandatory. Run the script with -h for online help."
385 readAndProcessSirutaCsv(file, source
=source
, comment
=comment
)
388 if __name__
== "__main__":