rename roken base64, fixes #107
[heimdal.git] / lib / base / json.c
blob2ef371b975ea99f47b78d8e8ee238271fef71bfa
1 /*
2 * Copyright (c) 2010 Kungliga Tekniska Högskolan
3 * (Royal Institute of Technology, Stockholm, Sweden).
4 * All rights reserved.
6 * Portions Copyright (c) 2010 Apple Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the Institute nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
36 #include "baselocl.h"
37 #include <ctype.h>
38 #include <base64.h>
40 static heim_base_once_t heim_json_once = HEIM_BASE_ONCE_INIT;
41 static heim_string_t heim_tid_data_uuid_key = NULL;
42 static const char base64_chars[] =
43 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
45 static void
46 json_init_once(void *arg)
48 heim_tid_data_uuid_key = __heim_string_constant("heimdal-type-data-76d7fca2-d0da-4b20-a126-1a10f8a0eae6");
51 struct twojson {
52 void *ctx;
53 void (*out)(void *, const char *);
54 size_t indent;
55 heim_json_flags_t flags;
56 int ret;
57 int first;
60 struct heim_strbuf {
61 char *str;
62 size_t len;
63 size_t alloced;
64 int enomem;
65 heim_json_flags_t flags;
68 static int
69 base2json(heim_object_t, struct twojson *);
71 static void
72 indent(struct twojson *j)
74 size_t i = j->indent;
75 if (j->flags & HEIM_JSON_F_ONE_LINE)
76 return;
77 while (i--)
78 j->out(j->ctx, "\t");
81 static void
82 array2json(heim_object_t value, void *ctx, int *stop)
84 struct twojson *j = ctx;
85 if (j->ret)
86 return;
87 if (j->first) {
88 j->first = 0;
89 } else {
90 j->out(j->ctx, NULL); /* eat previous '\n' if possible */
91 j->out(j->ctx, ",\n");
93 j->ret = base2json(value, j);
96 static void
97 dict2json(heim_object_t key, heim_object_t value, void *ctx)
99 struct twojson *j = ctx;
100 if (j->ret)
101 return;
102 if (j->first) {
103 j->first = 0;
104 } else {
105 j->out(j->ctx, NULL); /* eat previous '\n' if possible */
106 j->out(j->ctx, ",\n");
108 j->ret = base2json(key, j);
109 if (j->ret)
110 return;
111 j->out(j->ctx, " : \n");
112 j->indent++;
113 j->ret = base2json(value, j);
114 if (j->ret)
115 return;
116 j->indent--;
119 static int
120 base2json(heim_object_t obj, struct twojson *j)
122 heim_tid_t type;
123 int first = 0;
125 if (obj == NULL) {
126 if (j->flags & HEIM_JSON_F_CNULL2JSNULL) {
127 obj = heim_null_create();
128 } else if (j->flags & HEIM_JSON_F_NO_C_NULL) {
129 return EINVAL;
130 } else {
131 indent(j);
132 j->out(j->ctx, "<NULL>\n"); /* This is NOT valid JSON! */
133 return 0;
137 type = heim_get_tid(obj);
138 switch (type) {
139 case HEIM_TID_ARRAY:
140 indent(j);
141 j->out(j->ctx, "[\n");
142 j->indent++;
143 first = j->first;
144 j->first = 1;
145 heim_array_iterate_f(obj, j, array2json);
146 j->indent--;
147 if (!j->first)
148 j->out(j->ctx, "\n");
149 indent(j);
150 j->out(j->ctx, "]\n");
151 j->first = first;
152 break;
154 case HEIM_TID_DICT:
155 indent(j);
156 j->out(j->ctx, "{\n");
157 j->indent++;
158 first = j->first;
159 j->first = 1;
160 heim_dict_iterate_f(obj, j, dict2json);
161 j->indent--;
162 if (!j->first)
163 j->out(j->ctx, "\n");
164 indent(j);
165 j->out(j->ctx, "}\n");
166 j->first = first;
167 break;
169 case HEIM_TID_STRING:
170 indent(j);
171 j->out(j->ctx, "\"");
172 j->out(j->ctx, heim_string_get_utf8(obj));
173 j->out(j->ctx, "\"");
174 break;
176 case HEIM_TID_DATA: {
177 heim_dict_t d;
178 heim_string_t v;
179 const heim_octet_string *data;
180 char *b64 = NULL;
181 int ret;
183 if (j->flags & HEIM_JSON_F_NO_DATA)
184 return EINVAL; /* JSON doesn't do binary */
186 data = heim_data_get_data(obj);
187 ret = rk_base64_encode(data->data, data->length, &b64);
188 if (ret < 0 || b64 == NULL)
189 return ENOMEM;
191 if (j->flags & HEIM_JSON_F_NO_DATA_DICT) {
192 indent(j);
193 j->out(j->ctx, "\"");
194 j->out(j->ctx, b64); /* base64-encode; hope there's no aliasing */
195 j->out(j->ctx, "\"");
196 free(b64);
197 } else {
199 * JSON has no way to represent binary data, therefore the
200 * following is a Heimdal-specific convention.
202 * We encode binary data as a dict with a single very magic
203 * key with a base64-encoded value. The magic key includes
204 * a uuid, so we're not likely to alias accidentally.
206 d = heim_dict_create(2);
207 if (d == NULL) {
208 free(b64);
209 return ENOMEM;
211 v = heim_string_ref_create(b64, free);
212 if (v == NULL) {
213 free(b64);
214 heim_release(d);
215 return ENOMEM;
217 ret = heim_dict_set_value(d, heim_tid_data_uuid_key, v);
218 heim_release(v);
219 if (ret) {
220 heim_release(d);
221 return ENOMEM;
223 ret = base2json(d, j);
224 heim_release(d);
225 if (ret)
226 return ret;
228 break;
231 case HEIM_TID_NUMBER: {
232 char num[32];
233 indent(j);
234 snprintf(num, sizeof (num), "%d", heim_number_get_int(obj));
235 j->out(j->ctx, num);
236 break;
238 case HEIM_TID_NULL:
239 indent(j);
240 j->out(j->ctx, "null");
241 break;
242 case HEIM_TID_BOOL:
243 indent(j);
244 j->out(j->ctx, heim_bool_val(obj) ? "true" : "false");
245 break;
246 default:
247 return 1;
249 return 0;
252 static int
253 heim_base2json(heim_object_t obj, void *ctx, heim_json_flags_t flags,
254 void (*out)(void *, const char *))
256 struct twojson j;
258 if (flags & HEIM_JSON_F_STRICT_STRINGS)
259 return ENOTSUP; /* Sorry, not yet! */
261 heim_base_once_f(&heim_json_once, NULL, json_init_once);
263 j.indent = 0;
264 j.ctx = ctx;
265 j.out = out;
266 j.flags = flags;
267 j.ret = 0;
268 j.first = 1;
270 return base2json(obj, &j);
278 struct parse_ctx {
279 unsigned long lineno;
280 const uint8_t *p;
281 const uint8_t *pstart;
282 const uint8_t *pend;
283 heim_error_t error;
284 size_t depth;
285 heim_json_flags_t flags;
289 static heim_object_t
290 parse_value(struct parse_ctx *ctx);
293 * This function eats whitespace, but, critically, it also succeeds
294 * only if there's anything left to parse.
296 static int
297 white_spaces(struct parse_ctx *ctx)
299 while (ctx->p < ctx->pend) {
300 uint8_t c = *ctx->p;
301 if (c == ' ' || c == '\t' || c == '\r') {
303 } else if (c == '\n') {
304 ctx->lineno++;
305 } else
306 return 0;
307 (ctx->p)++;
309 return -1;
312 static int
313 is_number(uint8_t n)
315 return ('0' <= n && n <= '9');
318 static heim_number_t
319 parse_number(struct parse_ctx *ctx)
321 int number = 0, neg = 1;
323 if (ctx->p >= ctx->pend)
324 return NULL;
326 if (*ctx->p == '-') {
327 if (ctx->p + 1 >= ctx->pend)
328 return NULL;
329 neg = -1;
330 ctx->p += 1;
333 while (ctx->p < ctx->pend) {
334 if (is_number(*ctx->p)) {
335 number = (number * 10) + (*ctx->p - '0');
336 } else {
337 break;
339 ctx->p += 1;
342 return heim_number_create(number * neg);
345 static heim_string_t
346 parse_string(struct parse_ctx *ctx)
348 const uint8_t *start;
349 int quote = 0;
351 if (ctx->flags & HEIM_JSON_F_STRICT_STRINGS) {
352 ctx->error = heim_error_create(EINVAL, "Strict JSON string encoding "
353 "not yet supported");
354 return NULL;
357 if (*ctx->p != '"') {
358 ctx->error = heim_error_create(EINVAL, "Expected a JSON string but "
359 "found something else at line %lu",
360 ctx->lineno);
361 return NULL;
363 start = ++ctx->p;
365 while (ctx->p < ctx->pend) {
366 if (*ctx->p == '\n') {
367 ctx->lineno++;
368 } else if (*ctx->p == '\\') {
369 if (ctx->p + 1 == ctx->pend)
370 goto out;
371 ctx->p++;
372 quote = 1;
373 } else if (*ctx->p == '"') {
374 heim_object_t o;
376 if (quote) {
377 char *p0, *p;
378 p = p0 = malloc(ctx->p - start);
379 if (p == NULL)
380 goto out;
381 while (start < ctx->p) {
382 if (*start == '\\') {
383 start++;
384 /* XXX validate quoted char */
386 *p++ = *start++;
388 o = heim_string_create_with_bytes(p0, p - p0);
389 free(p0);
390 } else {
391 o = heim_string_create_with_bytes(start, ctx->p - start);
392 if (o == NULL) {
393 ctx->error = heim_error_create_enomem();
394 return NULL;
397 /* If we can decode as base64, then let's */
398 if (ctx->flags & HEIM_JSON_F_TRY_DECODE_DATA) {
399 void *buf;
400 size_t len;
401 const char *s;
403 s = heim_string_get_utf8(o);
404 len = strlen(s);
406 if (len >= 4 && strspn(s, base64_chars) >= len - 2) {
407 buf = malloc(len);
408 if (buf == NULL) {
409 heim_release(o);
410 ctx->error = heim_error_create_enomem();
411 return NULL;
413 len = rk_base64_decode(s, buf);
414 if (len == -1) {
415 free(buf);
416 return o;
418 heim_release(o);
419 o = heim_data_ref_create(buf, len, free);
423 ctx->p += 1;
425 return o;
427 ctx->p += 1;
429 out:
430 ctx->error = heim_error_create(EINVAL, "ran out of string");
431 return NULL;
434 static int
435 parse_pair(heim_dict_t dict, struct parse_ctx *ctx)
437 heim_string_t key;
438 heim_object_t value;
440 if (white_spaces(ctx))
441 return -1;
443 if (*ctx->p == '}') {
444 ctx->p++;
445 return 0;
448 if (ctx->flags & HEIM_JSON_F_STRICT_DICT)
449 /* JSON allows only string keys */
450 key = parse_string(ctx);
451 else
452 /* heim_dict_t allows any heim_object_t as key */
453 key = parse_value(ctx);
454 if (key == NULL)
455 /* Even heim_dict_t does not allow C NULLs as keys though! */
456 return -1;
458 if (white_spaces(ctx)) {
459 heim_release(key);
460 return -1;
463 if (*ctx->p != ':') {
464 heim_release(key);
465 return -1;
468 ctx->p += 1; /* safe because we call white_spaces() next */
470 if (white_spaces(ctx)) {
471 heim_release(key);
472 return -1;
475 value = parse_value(ctx);
476 if (value == NULL &&
477 (ctx->error != NULL || (ctx->flags & HEIM_JSON_F_NO_C_NULL))) {
478 if (ctx->error == NULL)
479 ctx->error = heim_error_create(EINVAL, "Invalid JSON encoding");
480 heim_release(key);
481 return -1;
483 heim_dict_set_value(dict, key, value);
484 heim_release(key);
485 heim_release(value);
487 if (white_spaces(ctx))
488 return -1;
490 if (*ctx->p == '}') {
492 * Return 1 but don't consume the '}' so we can count the one
493 * pair in a one-pair dict
495 return 1;
496 } else if (*ctx->p == ',') {
497 ctx->p++;
498 return 1;
500 return -1;
503 static heim_dict_t
504 parse_dict(struct parse_ctx *ctx)
506 heim_dict_t dict;
507 size_t count = 0;
508 int ret;
510 heim_assert(*ctx->p == '{', "string doesn't start with {");
512 dict = heim_dict_create(11);
513 if (dict == NULL) {
514 ctx->error = heim_error_create_enomem();
515 return NULL;
518 ctx->p += 1; /* safe because parse_pair() calls white_spaces() first */
520 while ((ret = parse_pair(dict, ctx)) > 0)
521 count++;
522 if (ret < 0) {
523 heim_release(dict);
524 return NULL;
526 if (count == 1 && !(ctx->flags & HEIM_JSON_F_NO_DATA_DICT)) {
527 heim_object_t v = heim_dict_copy_value(dict, heim_tid_data_uuid_key);
530 * Binary data encoded as a dict with a single magic key with
531 * base64-encoded value? Decode as heim_data_t.
533 if (v != NULL && heim_get_tid(v) == HEIM_TID_STRING) {
534 void *buf;
535 size_t len;
537 buf = malloc(strlen(heim_string_get_utf8(v)));
538 if (buf == NULL) {
539 heim_release(dict);
540 heim_release(v);
541 ctx->error = heim_error_create_enomem();
542 return NULL;
544 len = rk_base64_decode(heim_string_get_utf8(v), buf);
545 heim_release(v);
546 if (len == -1) {
547 free(buf);
548 return dict; /* assume aliasing accident */
550 heim_release(dict);
551 return (heim_dict_t)heim_data_ref_create(buf, len, free);
554 return dict;
557 static int
558 parse_item(heim_array_t array, struct parse_ctx *ctx)
560 heim_object_t value;
562 if (white_spaces(ctx))
563 return -1;
565 if (*ctx->p == ']') {
566 ctx->p++; /* safe because parse_value() calls white_spaces() first */
567 return 0;
570 value = parse_value(ctx);
571 if (value == NULL &&
572 (ctx->error || (ctx->flags & HEIM_JSON_F_NO_C_NULL)))
573 return -1;
575 heim_array_append_value(array, value);
576 heim_release(value);
578 if (white_spaces(ctx))
579 return -1;
581 if (*ctx->p == ']') {
582 ctx->p++;
583 return 0;
584 } else if (*ctx->p == ',') {
585 ctx->p++;
586 return 1;
588 return -1;
591 static heim_array_t
592 parse_array(struct parse_ctx *ctx)
594 heim_array_t array = heim_array_create();
595 int ret;
597 heim_assert(*ctx->p == '[', "array doesn't start with [");
598 ctx->p += 1;
600 while ((ret = parse_item(array, ctx)) > 0)
602 if (ret < 0) {
603 heim_release(array);
604 return NULL;
606 return array;
609 static heim_object_t
610 parse_value(struct parse_ctx *ctx)
612 size_t len;
613 heim_object_t o;
615 if (white_spaces(ctx))
616 return NULL;
618 if (*ctx->p == '"') {
619 return parse_string(ctx);
620 } else if (*ctx->p == '{') {
621 if (ctx->depth-- == 1) {
622 ctx->error = heim_error_create(EINVAL, "JSON object too deep");
623 return NULL;
625 o = parse_dict(ctx);
626 ctx->depth++;
627 return o;
628 } else if (*ctx->p == '[') {
629 if (ctx->depth-- == 1) {
630 ctx->error = heim_error_create(EINVAL, "JSON object too deep");
631 return NULL;
633 o = parse_array(ctx);
634 ctx->depth++;
635 return o;
636 } else if (is_number(*ctx->p) || *ctx->p == '-') {
637 return parse_number(ctx);
640 len = ctx->pend - ctx->p;
642 if ((ctx->flags & HEIM_JSON_F_NO_C_NULL) == 0 &&
643 len >= 6 && memcmp(ctx->p, "<NULL>", 6) == 0) {
644 ctx->p += 6;
645 return heim_null_create();
646 } else if (len >= 4 && memcmp(ctx->p, "null", 4) == 0) {
647 ctx->p += 4;
648 return heim_null_create();
649 } else if (len >= 4 && strncasecmp((char *)ctx->p, "true", 4) == 0) {
650 ctx->p += 4;
651 return heim_bool_create(1);
652 } else if (len >= 5 && strncasecmp((char *)ctx->p, "false", 5) == 0) {
653 ctx->p += 5;
654 return heim_bool_create(0);
657 ctx->error = heim_error_create(EINVAL, "unknown char %c at %lu line %lu",
658 (char)*ctx->p,
659 (unsigned long)(ctx->p - ctx->pstart),
660 ctx->lineno);
661 return NULL;
665 heim_object_t
666 heim_json_create(const char *string, size_t max_depth, heim_json_flags_t flags,
667 heim_error_t *error)
669 return heim_json_create_with_bytes(string, strlen(string), max_depth, flags,
670 error);
673 heim_object_t
674 heim_json_create_with_bytes(const void *data, size_t length, size_t max_depth,
675 heim_json_flags_t flags, heim_error_t *error)
677 struct parse_ctx ctx;
678 heim_object_t o;
680 heim_base_once_f(&heim_json_once, NULL, json_init_once);
682 ctx.lineno = 1;
683 ctx.p = data;
684 ctx.pstart = data;
685 ctx.pend = ((uint8_t *)data) + length;
686 ctx.error = NULL;
687 ctx.flags = flags;
688 ctx.depth = max_depth;
690 o = parse_value(&ctx);
692 if (o == NULL && error) {
693 *error = ctx.error;
694 } else if (ctx.error) {
695 heim_release(ctx.error);
698 return o;
702 static void
703 show_printf(void *ctx, const char *str)
705 if (str == NULL)
706 return;
707 fprintf(ctx, "%s", str);
711 * Dump a heimbase object to stderr (useful from the debugger!)
713 * @param obj object to dump using JSON or JSON-like format
715 * @addtogroup heimbase
717 void
718 heim_show(heim_object_t obj)
720 heim_base2json(obj, stderr, HEIM_JSON_F_NO_DATA_DICT, show_printf);
723 static void
724 strbuf_add(void *ctx, const char *str)
726 struct heim_strbuf *strbuf = ctx;
727 size_t len;
729 if (strbuf->enomem)
730 return;
732 if (str == NULL) {
734 * Eat the last '\n'; this is used when formatting dict pairs
735 * and array items so that the ',' separating them is never
736 * preceded by a '\n'.
738 if (strbuf->len > 0 && strbuf->str[strbuf->len - 1] == '\n')
739 strbuf->len--;
740 return;
743 len = strlen(str);
744 if ((len + 1) > (strbuf->alloced - strbuf->len)) {
745 size_t new_len = strbuf->alloced + (strbuf->alloced >> 2) + len + 1;
746 char *s;
748 s = realloc(strbuf->str, new_len);
749 if (s == NULL) {
750 strbuf->enomem = 1;
751 return;
753 strbuf->str = s;
754 strbuf->alloced = new_len;
756 /* +1 so we copy the NUL */
757 (void) memcpy(strbuf->str + strbuf->len, str, len + 1);
758 strbuf->len += len;
759 if (strbuf->str[strbuf->len - 1] == '\n' &&
760 strbuf->flags & HEIM_JSON_F_ONE_LINE)
761 strbuf->len--;
764 #define STRBUF_INIT_SZ 64
766 heim_string_t
767 heim_json_copy_serialize(heim_object_t obj, heim_json_flags_t flags, heim_error_t *error)
769 heim_string_t str;
770 struct heim_strbuf strbuf;
771 int ret;
773 if (error)
774 *error = NULL;
776 memset(&strbuf, 0, sizeof (strbuf));
777 strbuf.str = malloc(STRBUF_INIT_SZ);
778 if (strbuf.str == NULL) {
779 if (error)
780 *error = heim_error_create_enomem();
781 return NULL;
783 strbuf.len = 0;
784 strbuf.alloced = STRBUF_INIT_SZ;
785 strbuf.str[0] = '\0';
786 strbuf.flags = flags;
788 ret = heim_base2json(obj, &strbuf, flags, strbuf_add);
789 if (ret || strbuf.enomem) {
790 if (error) {
791 if (strbuf.enomem || ret == ENOMEM)
792 *error = heim_error_create_enomem();
793 else
794 *error = heim_error_create(1, "Impossible to JSON-encode "
795 "object");
797 free(strbuf.str);
798 return NULL;
800 if (flags & HEIM_JSON_F_ONE_LINE) {
801 strbuf.flags &= ~HEIM_JSON_F_ONE_LINE;
802 strbuf_add(&strbuf, "\n");
804 str = heim_string_ref_create(strbuf.str, free);
805 if (str == NULL) {
806 if (error)
807 *error = heim_error_create_enomem();
808 free(strbuf.str);
810 return str;