Now also parse new diffs
[handlerosm.git] / osmbinary / osmbinary.c
blob93835e87a188a8c4732c2f2bdfafd0a8e5e3becd
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <unistd.h>
12 #include "osmbinary.h"
15 * <osm>
16 * <node>
17 * <tag k=".." v=".." />
18 * </node>
19 * <way>
20 * <nd>
21 * <tag>
22 * </way>
23 * <relation>
24 * <member>
25 * <tag>
26 * </relation>
29 static void parser(char *range, unsigned long int max) {
30 typedef enum { OSM = 0, NODE = 1, WAY = 2, RELATION = 3, TAG = 4, ND = 5, MEMBER = 6 } osm_state_t;
31 typedef enum { UNKNOWN = 0, ID, LAT, LON, USER, TIMESTAMP, KEY, VALUE, TYPE, REF, ROLE} key_state_t;
32 char *attr_id = NULL, *attr_lat = NULL, *attr_lon = NULL, *attr_user = NULL, *attr_timestamp = NULL, *attr_key = NULL, *attr_value = NULL,
33 *attr_type = NULL, *attr_ref = NULL, *attr_role = NULL;
35 // unsigned long int count_nodes = 0, count_node_tags = 0,
36 // count_ways = 0, count_way_tags = 0, count_way_nds = 0,
37 // count_relations = 0, count_relation_tags = 0, count_members_node = 0, count_members_relation = 0, count_members_way = 0;
39 // unsigned long int sequence = 0;
41 int bi;
42 node_t bin_node;
43 other_t bin_other;
44 tag_t bin_tag;
45 nd_t bin_nd;
46 member_t bin_member;
47 struct tm bin_tmp;
49 unlink("osm.bin");
50 bi = open("osm.bin", O_WRONLY | O_CREAT, S_IRUSR| S_IWUSR|S_IRGRP|S_IROTH);
52 osm_state_t current_tag = OSM;
53 osm_state_t parent_tag = OSM;
55 char *start, *end, *nodename, *nodename_end;
57 start = range;
58 end = strchrnul((const char*) start, '\n');
60 if (strncmp(start, "<?xml", 5) != 0)
61 return;
63 start = end + 1;
64 end = strchrnul((const char*) start, '\n');
66 if (strncmp(start, "<osm", 4) != 0)
67 return;
69 start = end + 1;
71 do {
72 end = strchrnul((const char*) start, '\n');
74 nodename = strchrnul(start, '<') + 1;
75 nodename_end = strchrnul(nodename, ' ');
77 if (nodename[0] == '/') {
78 free(attr_id);
79 free(attr_lat);
80 free(attr_lon);
81 free(attr_timestamp);
82 free(attr_user);
84 attr_id = NULL;
85 attr_lat = NULL;
86 attr_lon = NULL;
87 attr_user = NULL;
88 attr_timestamp = NULL;
90 // sequence = 0;
92 start = end + 1;
93 continue;
96 switch (nodename_end - nodename) {
97 case 2:
98 current_tag = ND;
99 break;
100 case 3: {
101 switch (nodename[0]) {
102 case 'o':
103 current_tag = OSM;
104 break;
105 case 'w':
106 current_tag = WAY;
107 break;
108 case 't':
109 current_tag = TAG;
110 break;
111 default:
112 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
114 break;
116 case 4:
117 current_tag = NODE;
118 break;
119 case 6:
120 current_tag = MEMBER;
121 break;
122 case 8:
123 current_tag = RELATION;
124 break;
125 default:
126 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
130 char *key, *key_end, *value_end;
131 key = nodename_end + 1;
133 do {
134 char *value;
135 key_state_t current_key = UNKNOWN;
136 key_end = strchrnul(key, '=');
138 if (key_end == NULL || key_end >= end)
139 break;
141 switch (key_end - key) {
142 case 1: {
143 switch (key[0]) {
144 case 'k':
145 current_key = KEY;
146 break;
147 case 'v':
148 current_key = VALUE;
149 break;
150 default:
151 current_key = UNKNOWN;
153 break;
155 case 2:
156 current_key = ID;
157 break;
158 case 3: {
159 switch (key[1]) {
160 case 'a':
161 current_key = LAT;
162 break;
163 case 'o':
164 current_key = LON;
165 break;
166 case 'e':
167 current_key = REF;
168 break;
169 default:
170 current_key = UNKNOWN;
171 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
173 break;
175 case 4: {
176 switch (key[0]) {
177 case 'u':
178 current_key = USER;
179 break;
180 case 'r':
181 current_key = ROLE;
182 break;
183 case 't':
184 current_key = TYPE;
185 break;
186 default:
187 current_key = UNKNOWN;
188 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
190 break;
192 case 9:
193 current_key = TIMESTAMP;
194 break;
195 default: {
196 char *thingie = strndup(key, (key_end - key));
197 current_key = UNKNOWN;
199 fprintf(stderr, "UNKNOWN ATTR %s-> %c%c\n", thingie, key[0], key[1]);
200 free(thingie);
204 value = key_end + 2;
205 value_end = value;
206 value_end = strchr(value_end, '"');
208 if (value_end > end)
209 break;
211 switch (current_key) {
212 case ID:
213 if (attr_id) free(attr_id);
214 attr_id = strndup(value, (value_end - value));
215 break;
217 case LAT:
218 if (attr_lat) free(attr_lat);
219 attr_lat = strndup(value, (value_end - value));
220 break;
222 case LON:
223 if (attr_lon) free(attr_lon);
224 attr_lon = strndup(value, (value_end - value));
225 break;
227 case TIMESTAMP:
228 if (attr_timestamp) free(attr_timestamp);
229 attr_timestamp = strndup(value, (value_end - value));
230 // attr_timestamp[10] = ' '; /* Stupid timestamp fix */
231 break;
233 case USER: {
234 char *tmp;
235 if (attr_user) free(attr_user);
236 attr_user = strndup(value, (value_end - value));
237 // tmp = escape_string(attr_user);
238 // free(attr_user);
239 // attr_user = tmp;
240 break;
243 case KEY: {
244 char *tmp;
245 if (attr_key) free(attr_key);
246 attr_key = strndup(value, (value_end - value));
247 // tmp = escape_string(attr_key);
248 // free(attr_key);
249 // attr_key = tmp;
250 break;
253 case VALUE: {
254 char *tmp;
255 if (attr_value) free(attr_value);
256 attr_value = strndup(value, (value_end - value));
257 // tmp = escape_string(attr_value);
258 // free(attr_value);
259 // attr_value = tmp;
260 break;
263 case TYPE:
264 if (attr_type) free(attr_type);
265 attr_type = strndup(value, (value_end - value));
266 break;
268 case REF:
269 if (attr_ref) free(attr_ref);
270 attr_ref = strndup(value, (value_end - value));
271 break;
273 case ROLE: {
274 char *tmp;
275 if (attr_role) free(attr_role);
276 attr_role = strndup(value, (value_end - value));
277 // tmp = escape_string(attr_role);
278 // free(attr_role);
279 // attr_role = tmp;
280 break;
283 default:
284 fprintf(stderr, "--> %c%c\n", value[0], value[1]);
287 key = value_end + 2;
288 } while (key < end);
290 switch (current_tag) {
291 case NODE:
292 bin_node.id = strtoul(attr_id, (char **) NULL, 10);
293 bin_node.lon = strtof(attr_lon, (char **) NULL);
294 bin_node.lat = strtof(attr_lat, (char **) NULL);
295 if (!attr_user) {
296 bin_node.userlen = 0;
297 bin_node.user = NULL;
298 } else {
299 bin_node.userlen = strlen(attr_user);
300 bin_node.user = attr_user;
302 strptime(attr_timestamp, "%FT%T%Z", &bin_tmp);
303 bin_node.stamp = mktime(&bin_tmp);
304 writenode(bi, &bin_node);
305 // fprintf(fd_nodes, "%s, %s, %s, '%s', %s\n", attr_id, attr_lat, attr_lon, attr_user, attr_timestamp);
306 // count_nodes++;
307 break;
308 case TAG: {
309 bin_tag.klen = strlen(attr_key);
310 bin_tag.k = attr_key;
311 bin_tag.vlen = strlen(attr_value);
312 bin_tag.v = attr_value;
313 writetag(bi, &bin_tag);
315 /* switch (parent_tag) {
316 case NODE:
317 fprintf(fd_node_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
318 count_node_tags++;
319 break;
320 case WAY:
321 fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
322 count_way_tags++;
323 break;
324 case RELATION:
325 fprintf(fd_relation_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
326 count_relation_tags++;
327 break;
328 default:
329 break;
331 break;
333 case WAY:
334 bin_other.id = strtoul(attr_id, (char **) NULL, 10);
335 if (!attr_user) {
336 bin_other.userlen = 0;
337 bin_other.user = NULL;
338 } else {
339 bin_other.userlen = strlen(attr_user);
340 bin_other.user = attr_user;
342 strptime(attr_timestamp, "%FT%T%Z", &bin_tmp);
343 bin_other.stamp = mktime(&bin_tmp);
344 writeother(bi, &bin_other, 'W');
346 // fprintf(fd_ways, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
347 // count_ways++;
348 // fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, "type", "way");
349 // count_way_tags++;
350 break;
351 case RELATION:
352 bin_other.id = strtoul(attr_id, (char **) NULL, 10);
353 if (!attr_user) {
354 bin_other.userlen = 0;
355 bin_other.user = NULL;
356 } else {
357 bin_other.userlen = strlen(attr_user);
358 bin_other.user = attr_user;
360 strptime(attr_timestamp, "%FT%T%Z", &bin_tmp);
361 bin_other.stamp = mktime(&bin_tmp);
362 writeother(bi, &bin_other, 'R');
364 // fprintf(fd_relations, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
365 // count_relations++;
366 break;
367 case MEMBER:
368 bin_member.type = attr_type;
369 bin_member.typelen = strlen(attr_type);
370 bin_member.ref = attr_ref;
371 bin_member.reflen = strlen(attr_ref);
372 bin_member.role = attr_role;
373 bin_member.rolelen = strlen(attr_role);
374 writemember(bi, &bin_member);
376 /* if (strcmp(attr_type, "node") == 0) {
377 fprintf(fd_members_node, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
378 count_members_node++;
379 } else if (strcmp(attr_type, "way") == 0) {
380 fprintf(fd_members_way, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
381 count_members_way++;
382 } else if (strcmp(attr_type, "relation") == 0) {
383 fprintf(fd_members_relation, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
384 count_members_relation++;
386 sequence++;*/
387 break;
388 case ND:
389 bin_nd.id = strtoul(attr_id, (char **) NULL, 10);
390 writend(bi, &bin_nd);
391 /* fprintf(fd_way_nds, "%s, %lu, %s\n", attr_id, sequence, attr_ref);
392 sequence++;
393 count_way_nds++;
394 break;*/
395 default:
396 break;
399 if (end[-2] == '/') {
400 switch (current_tag) {
401 case NODE:
402 free(attr_lat);
403 free(attr_lon);
404 attr_lat = NULL;
405 attr_lon = NULL;
406 /* no break! */
408 case WAY:
409 case RELATION:
410 free(attr_id);
411 free(attr_timestamp);
412 free(attr_user);
414 attr_id = NULL;
415 attr_user = NULL;
416 attr_timestamp = NULL;
418 // sequence = 0;
419 break;
421 case TAG:
422 free(attr_key);
423 free(attr_value);
425 attr_key = NULL;
426 attr_value = NULL;
427 break;
429 case ND:
430 case MEMBER:
431 free(attr_type);
432 free(attr_ref);
433 free(attr_role);
435 attr_type = NULL;
436 attr_ref = NULL;
437 attr_role = NULL;
438 default:
439 break;
441 } else if (current_tag == NODE || current_tag == WAY || current_tag == RELATION) {
442 parent_tag = current_tag;
445 } while ((start = ++end) < (range + max));
447 free(attr_id);
448 free(attr_lat);
449 free(attr_lon);
450 free(attr_timestamp);
451 free(attr_user);
453 free(attr_key);
454 free(attr_value);
456 close(bi);
460 int main(int argc, char *argv[]) {
461 int fd;
462 struct stat statbuf;
464 if (argc != 2)
465 exit(-1);
467 fprintf(stderr, "Analysing %s...\n", argv[1]);
469 fd = open(argv[1], O_RDONLY);
471 if (fd < 0)
472 exit(-1);
474 if (fstat (fd, &statbuf) == -1) { perror("fstat:"); exit(-1); }
476 if (statbuf.st_size > 0) {
477 char *range = NULL;
478 range = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, (off_t) 0);
479 if (range == MAP_FAILED) { perror("Mmap:"); printf("(did you compile PAE in the kernel?)\n"); exit(-1); }
480 parser(range, statbuf.st_size / sizeof(char));
481 munmap(range, statbuf.st_size);
484 close(fd);
485 exit(0);