Update Red Hat Copyright Notices
[nbdkit.git] / plugins / data / format.c
blobfa643d7137df8d7625ff7db1052d97329a161845
1 /* nbdkit
2 * Copyright Red Hat
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 #include <config.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include <inttypes.h>
40 #include <stdarg.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <signal.h>
45 #define NBDKIT_API_VERSION 2
46 #include <nbdkit-plugin.h>
48 #include "allocator.h"
49 #include "ascii-ctype.h"
50 #include "byte-swapping.h"
51 #include "cleanup.h"
52 #include "hexdigit.h"
53 #include "ispowerof2.h"
54 #include "minmax.h"
55 #include "nbdkit-string.h"
56 #include "rounding.h"
57 #include "strndup.h"
58 #include "vector.h"
59 #include "windows-compat.h"
61 #include "data.h"
62 #include "format.h"
64 /* To print the AST, use -D data.AST=1 */
65 NBDKIT_DLL_PUBLIC int data_debug_AST = 0;
67 /* The abstract syntax tree. */
68 typedef struct expr expr_t;
70 static string
71 substring (string s, size_t offset, size_t len)
73 size_t i;
74 string r = empty_vector;
76 for (i = 0; i < len; ++i) {
77 assert (offset+i < s.len);
78 if (string_append (&r, s.ptr[offset+i]) == -1) {
79 nbdkit_error ("realloc: %m");
80 exit (EXIT_FAILURE);
84 return r;
87 typedef size_t node_id; /* references a node in expr_table below */
88 DEFINE_VECTOR_TYPE (node_ids, node_id);
90 enum expr_type {
91 EXPR_NULL = 0, /* null expression, no effect */
92 EXPR_LIST, /* list - list of node IDs / nested expr */
93 EXPR_BYTE, /* b - single byte */
94 EXPR_ABS_OFFSET, /* ui - absolute offset (@OFFSET) */
95 EXPR_REL_OFFSET, /* i - relative offset (@+N or @-N) */
96 EXPR_ALIGN_OFFSET, /* ui - align offset (@^ALIGNMENT) */
97 EXPR_FILE, /* filename - read a file */
98 EXPR_SCRIPT, /* script - run script */
99 EXPR_STRING, /* string - string + length */
100 EXPR_FILL, /* fl.n, fl.b - fill same byte b, N times */
101 EXPR_NAME, /* name - insert a named expression */
102 EXPR_ASSIGN, /* a.name, a.id - assign name to expr */
103 EXPR_REPEAT, /* r.id, r.n - expr * N */
104 EXPR_SLICE, /* sl.id, sl.n, sl.m - expr[N:M] */
107 struct expr {
108 enum expr_type t;
109 union {
110 node_ids list;
111 uint8_t b;
112 int64_t i;
113 uint64_t ui;
114 node_id id;
115 char *filename;
116 char *script;
117 string string;
118 struct {
119 uint64_t n;
120 uint8_t b;
121 } fl;
122 char *name;
123 struct {
124 char *name;
125 node_id id;
126 } a;
127 struct {
128 node_id id;
129 uint64_t n;
130 } r;
131 struct {
132 node_id id;
133 uint64_t n;
134 int64_t m;
135 } sl;
139 /* We store a list of expressions (expr_t) in a global table.
141 * When referencing one expression from another (eg. for EXPR_REPEAT)
142 * we refer to the index into this table (node_id) instead of pointing
143 * to the expr_t directly.
145 * This allows us to have nodes which reference each other or are
146 * shared or removed without having to worry about reference counting.
147 * The whole table is freed when the plugin is unloaded.
149 * As an optimization, the zeroth element (node_id == 0) is a common
150 * EXPR_NULL. (This is optimized automatically, don't use node_id 0
151 * explicitly, in particular because if the table hasn't yet been
152 * allocated then there is no zeroth element).
154 DEFINE_VECTOR_TYPE (expr_list, expr_t);
155 static expr_list expr_table;
157 /* Add the expression to the table, returning the node_id. */
158 static node_id
159 new_node (const expr_t e)
161 if (expr_table.len == 0) {
162 static const expr_t enull = { .t = EXPR_NULL };
163 if (expr_list_append (&expr_table, enull) == -1)
164 goto out_of_memory;
166 if (e.t == EXPR_NULL)
167 return 0;
168 if (expr_list_append (&expr_table, e) == -1) {
169 out_of_memory:
170 nbdkit_error ("realloc");
171 exit (EXIT_FAILURE);
173 return expr_table.len-1;
176 /* Get an expression by node_id. */
177 static expr_t
178 get_node (node_id id)
180 assert (id < expr_table.len);
181 return expr_table.ptr[id];
184 static void
185 free_expr_table (void)
187 size_t i;
188 expr_t e;
190 for (i = 0; i < expr_table.len; ++i) {
191 e = get_node (i);
192 switch (e.t) {
193 case EXPR_LIST: free (e.list.ptr); break;
194 case EXPR_FILE: free (e.filename); break;
195 case EXPR_SCRIPT: free (e.script); break;
196 case EXPR_STRING: free (e.string.ptr); break;
197 case EXPR_NAME: free (e.name); break;
198 case EXPR_ASSIGN: free (e.a.name); break;
199 default: break;
203 expr_list_reset (&expr_table);
206 /* Construct an expression. */
207 static expr_t
208 expr (enum expr_type t, ...)
210 expr_t e = { .t = t };
211 va_list args;
213 /* Note that you cannot pass uint8_t through varargs, so for the
214 * byte fields we use int here.
216 va_start (args, t);
217 switch (t) {
218 case EXPR_NULL: /* nothing */ break;
219 case EXPR_LIST: e.list = va_arg (args, node_ids); break;
220 case EXPR_BYTE: e.b = va_arg (args, int); break;
221 case EXPR_ABS_OFFSET:
222 case EXPR_ALIGN_OFFSET:
223 e.ui = va_arg (args, uint64_t);
224 break;
225 case EXPR_REL_OFFSET: e.i = va_arg (args, int64_t); break;
226 case EXPR_FILE: e.filename = va_arg (args, char *); break;
227 case EXPR_SCRIPT: e.script = va_arg (args, char *); break;
228 case EXPR_STRING: e.string = va_arg (args, string); break;
229 case EXPR_FILL:
230 e.fl.b = va_arg (args, int);
231 e.fl.n = va_arg (args, uint64_t);
232 break;
233 case EXPR_NAME: e.name = va_arg (args, char *); break;
234 case EXPR_ASSIGN:
235 e.a.name = va_arg (args, char *);
236 e.a.id = va_arg (args, node_id);
237 break;
238 case EXPR_REPEAT:
239 e.r.id = va_arg (args, node_id);
240 e.r.n = va_arg (args, uint64_t);
241 break;
242 case EXPR_SLICE:
243 e.sl.id = va_arg (args, node_id);
244 e.sl.n = va_arg (args, uint64_t);
245 e.sl.m = va_arg (args, int64_t);
246 break;
248 va_end (args);
250 return e;
253 /* Make a shallow copy of an expression. */
254 static expr_t
255 copy_expr (expr_t e)
257 switch (e.t) {
258 /* These have fields that have to be duplicated. */
259 case EXPR_LIST:
260 if (node_ids_duplicate (&e.list, &e.list) == -1) {
261 nbdkit_error ("malloc");
262 exit (EXIT_FAILURE);
264 break;
265 case EXPR_FILE:
266 e.filename = strdup (e.filename);
267 if (e.filename == NULL) {
268 nbdkit_error ("strdup");
269 exit (EXIT_FAILURE);
271 break;
272 case EXPR_SCRIPT:
273 e.script = strdup (e.script);
274 if (e.script == NULL) {
275 nbdkit_error ("strdup");
276 exit (EXIT_FAILURE);
278 break;
279 case EXPR_STRING:
280 if (string_duplicate (&e.string, &e.string) == -1) {
281 nbdkit_error ("malloc");
282 exit (EXIT_FAILURE);
284 break;
285 case EXPR_NAME:
286 e.name = strdup (e.name);
287 if (e.name == NULL) {
288 nbdkit_error ("strdup");
289 exit (EXIT_FAILURE);
291 break;
292 case EXPR_ASSIGN:
293 e.a.name = strdup (e.a.name);
294 if (e.a.name == NULL) {
295 nbdkit_error ("strdup");
296 exit (EXIT_FAILURE);
298 break;
300 /* These don't require anything special. */
301 case EXPR_NULL:
302 case EXPR_BYTE:
303 case EXPR_ABS_OFFSET:
304 case EXPR_REL_OFFSET:
305 case EXPR_ALIGN_OFFSET:
306 case EXPR_FILL:
307 case EXPR_REPEAT:
308 case EXPR_SLICE:
309 break;
312 return e;
315 static const char *
316 debug_indent (int level)
318 static const char spaces[41] = " ";
320 if (level >= 10)
321 return spaces;
322 else
323 return &spaces[40-4*level];
326 static void
327 debug_expr (node_id id, int level)
329 const expr_t e = get_node (id);
330 size_t i;
332 switch (e.t) {
333 case EXPR_NULL:
334 nbdkit_debug ("%snull", debug_indent (level));
335 break;
336 case EXPR_LIST:
337 nbdkit_debug ("%s(", debug_indent (level));
338 for (i = 0; i < e.list.len; ++i)
339 debug_expr (e.list.ptr[i], level+1);
340 nbdkit_debug ("%s)", debug_indent (level));
341 break;
342 case EXPR_BYTE:
343 nbdkit_debug ("%s%" PRIu8, debug_indent (level), e.b);
344 break;
345 case EXPR_ABS_OFFSET:
346 nbdkit_debug ("%s@%" PRIu64, debug_indent (level), e.ui);
347 break;
348 case EXPR_REL_OFFSET:
349 nbdkit_debug ("%s@%" PRIi64, debug_indent (level), e.i);
350 break;
351 case EXPR_ALIGN_OFFSET:
352 nbdkit_debug ("%s@^%" PRIi64, debug_indent (level), e.ui);
353 break;
354 case EXPR_FILE:
355 nbdkit_debug ("%s<%s", debug_indent (level), e.filename);
356 break;
357 case EXPR_SCRIPT:
358 nbdkit_debug ("%s<(%s)", debug_indent (level), e.script);
359 break;
360 case EXPR_STRING: {
361 CLEANUP_FREE_STRING string s = empty_vector;
363 for (i = 0; i < e.string.len; ++i) {
364 char c = e.string.ptr[i];
365 if (ascii_isprint ((char) c))
366 string_append (&s, e.string.ptr[i]);
367 else {
368 string_append (&s, '\\');
369 string_append (&s, 'x');
370 string_append (&s, hexchar (c >> 4));
371 string_append (&s, hexchar (c));
374 string_append (&s, '\0');
375 nbdkit_debug ("%s\"%s\"", debug_indent (level), s.ptr);
376 break;
378 case EXPR_FILL:
379 nbdkit_debug ("%sfill(%" PRIu8 "*%" PRIu64 ")",
380 debug_indent (level), e.fl.b, e.fl.n);
381 break;
382 case EXPR_NAME:
383 nbdkit_debug ("%s\\%s", debug_indent (level), e.name);
384 break;
385 case EXPR_ASSIGN:
386 nbdkit_debug ("%s(", debug_indent (level));
387 debug_expr (e.a.id, level+1);
388 nbdkit_debug ("%s) -> \\%s", debug_indent (level), e.a.name);
389 break;
390 case EXPR_REPEAT:
391 nbdkit_debug ("%s(", debug_indent (level));
392 debug_expr (e.r.id, level+1);
393 nbdkit_debug ("%s) *%" PRIu64, debug_indent (level), e.r.n);
394 break;
395 case EXPR_SLICE:
396 nbdkit_debug ("%s(", debug_indent (level));
397 debug_expr (e.sl.id, level+1);
398 nbdkit_debug ("%s)[%" PRIu64 ":%" PRIi64 "]",
399 debug_indent (level), e.sl.n, e.sl.m);
400 break;
404 /* Is the expression something which contains data, or something else
405 * (like an offset). Just a light check to reject nonsense
406 * expressions like "@0 * 10".
408 static bool
409 is_data_expr (const expr_t e)
411 return e.t != EXPR_ABS_OFFSET && e.t != EXPR_REL_OFFSET
412 && e.t != EXPR_ALIGN_OFFSET;
415 /* Simple dictionary of name -> expression. */
416 typedef struct dict dict_t;
417 struct dict {
418 dict_t *next;
419 const char *name; /* Name excluding \ character. */
420 node_id id; /* Associated expression. */
423 static int parser (int level, const char *value, size_t *start, size_t len,
424 node_id *root_rtn);
425 static int optimize_ast (node_id root, node_id *root_rtn);
426 static int evaluate (const dict_t *dict, node_id root,
427 struct allocator *a,
428 uint64_t *offset, uint64_t *size);
431 read_data_format (const char *value, struct allocator *a, uint64_t *size_rtn)
433 size_t i = 0;
434 node_id root;
435 uint64_t offset = 0;
436 int r = -1;
438 assert (expr_table.len == 0);
440 /* Run the parser across the entire string, returning the top level
441 * expression.
443 if (parser (0, value, &i, strlen (value), &root) == -1)
444 goto out;
446 if (optimize_ast (root, &root) == -1)
447 goto out;
449 if (data_debug_AST) {
450 nbdkit_debug ("BEGIN AST (-D data.AST=1)");
451 debug_expr (root, 0);
452 nbdkit_debug ("END AST");
455 /* Evaluate the expression into the allocator. */
456 r = evaluate (NULL, root, a, &offset, size_rtn);
458 out:
459 free_expr_table ();
460 return r;
463 static int parse_string (const char *value, size_t *start, size_t len,
464 string *rtn);
465 static int parse_word (const char *value, size_t *start, size_t len,
466 string *rtn);
467 static size_t get_name (const char *value, size_t i, size_t len,
468 size_t *initial);
469 static size_t get_var (const char *value, size_t i, size_t len,
470 size_t *initial);
471 static size_t get_script (const char *value, size_t i, size_t len);
473 /* This is the format parser. */
474 static int
475 parser (int level, const char *value, size_t *start, size_t len,
476 node_id *rtn)
478 size_t i = *start;
479 /* List of node_ids that we are building up at this level. This is
480 * leaked on error paths, but we're going to call exit(1).
482 node_ids list = empty_vector;
484 while (i < len) {
485 #define APPEND_EXPR(node_id) \
486 do { \
487 if (node_ids_append (&list, (node_id)) == -1) { \
488 nbdkit_error ("realloc: %m"); \
489 exit (EXIT_FAILURE); \
491 } while (0)
492 node_id id;
493 int j, n;
494 int64_t i64, m;
495 size_t flen;
497 switch (value[i]) {
498 case '#': /* # comment */
499 i++;
500 while (i < len && value[i] != '\n')
501 i++;
502 break;
504 case '@': /* @OFFSET */
505 if (++i == len) goto parse_error;
506 switch (value[i]) {
507 case '+': /* @+N */
508 if (++i == len) goto parse_error;
509 if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
510 if (i64 < 0) {
511 nbdkit_error ("data parameter after @+ must not be negative");
512 return -1;
514 i += n;
515 APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET, i64)));
517 else
518 goto parse_error;
519 break;
520 case '-': /* @-N */
521 if (++i == len) goto parse_error;
522 if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
523 if (i64 < 0) {
524 nbdkit_error ("data parameter after @- must not be negative");
525 return -1;
527 i += n;
528 APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET, -i64)));
530 else
531 goto parse_error;
532 break;
533 case '^': /* @^ALIGNMENT */
534 if (++i == len) goto parse_error;
535 /* We must use %i into i64 in order to parse 0x etc. */
536 if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
537 if (i64 < 0) {
538 nbdkit_error ("data parameter after @^ must not be negative");
539 return -1;
541 /* XXX fix this arbitrary restriction */
542 if (!is_power_of_2 (i64)) {
543 nbdkit_error ("data parameter @^%" PRIi64 " must be a power of 2",
544 i64);
545 return -1;
547 i += n;
548 APPEND_EXPR (new_node (expr (EXPR_ALIGN_OFFSET, (uint64_t) i64)));
550 else
551 goto parse_error;
552 break;
553 case '0': case '1': case '2': case '3': case '4':
554 case '5': case '6': case '7': case '8': case '9':
555 /* We must use %i into i64 in order to parse 0x etc. */
556 if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
557 if (i64 < 0) {
558 nbdkit_error ("data parameter @OFFSET must not be negative");
559 return -1;
561 i += n;
562 APPEND_EXPR (new_node (expr (EXPR_ABS_OFFSET, (uint64_t) i64)));
564 else
565 goto parse_error;
566 break;
567 default:
568 goto parse_error;
570 break;
572 case '(': /* ( */
573 i++;
575 /* Call self recursively. */
576 if (parser (level+1, value, &i, len, &id) == -1)
577 return -1;
578 /* parser() always returns a list. */
579 assert (get_node (id).t == EXPR_LIST);
580 APPEND_EXPR (id);
581 break;
583 case ')': /* ) */
584 if (level < 1) {
585 nbdkit_error ("unmatched ')' in data string");
586 return -1;
588 i++;
589 goto out;
591 case '*': /* expr*N */
592 i++;
593 if (list.len == 0) {
594 nbdkit_error ("*N must follow an expression");
595 return -1;
597 if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
598 nbdkit_error ("*N cannot be applied to this type of expression");
599 return -1;
601 if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
602 if (i64 < 0) {
603 nbdkit_error ("data parameter @OFFSET must not be negative");
604 return -1;
606 i += n;
608 else {
609 nbdkit_error ("*N not numeric");
610 return -1;
612 id = list.ptr[list.len-1];
613 list.len--;
614 APPEND_EXPR (new_node (expr (EXPR_REPEAT, id, (uint64_t) i64)));
615 break;
617 case '[': /* expr[k:m] */
618 i++;
619 if (list.len == 0) {
620 nbdkit_error ("[N:M] must follow an expression");
621 return -1;
623 if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
624 nbdkit_error ("[N:M] cannot be applied to this type of expression");
625 return -1;
627 i64 = 0;
628 m = -1;
629 if (sscanf (&value[i], "%" SCNi64 ":%" SCNi64 "]%n",
630 &i64, &m, &n) == 2 ||
631 sscanf (&value[i], ":%" SCNi64 "]%n", &m, &n) == 1 ||
632 sscanf (&value[i], "%" SCNi64 ":]%n", &i64, &n) == 1)
633 i += n;
634 else if (strncmp (&value[i], ":]", 2) == 0)
635 i += 2;
636 else {
637 nbdkit_error ("enclosed pattern (...)[N:M] not numeric");
638 return -1;
640 id = list.ptr[list.len-1];
641 list.len--;
642 APPEND_EXPR (new_node (expr (EXPR_SLICE, id, i64, m)));
643 break;
645 case '<':
646 if (i+1 < len && value[i+1] == '(') { /* <(SCRIPT) */
647 char *script;
649 i += 2;
651 flen = get_script (value, i, len);
652 if (flen == 0) goto parse_error;
653 script = strndup (&value[i], flen);
654 if (script == NULL) {
655 nbdkit_error ("strndup: %m");
656 return -1;
658 i += flen + 1; /* +1 for trailing ) */
659 APPEND_EXPR (new_node (expr (EXPR_SCRIPT, script)));
661 else { /* <FILE */
662 char *filename;
664 i++;
666 /* The filename follows next in the string. */
667 flen = strcspn (&value[i], "*[) \t\n");
668 if (flen == 0) {
669 nbdkit_error ("data parameter <FILE not a filename");
670 return -1;
672 filename = strndup (&value[i], flen);
673 if (filename == NULL) {
674 nbdkit_error ("strndup: %m");
675 return -1;
677 i += flen;
678 APPEND_EXPR (new_node (expr (EXPR_FILE, filename)));
680 break;
682 case '"': { /* "String" */
683 string str = empty_vector;
685 i++;
686 if (parse_string (value, &i, len, &str) == -1)
687 return -1;
688 APPEND_EXPR (new_node (expr (EXPR_STRING, str)));
689 break;
692 case '\\': { /* \\NAME */
693 char *name;
695 flen = get_name (value, i, len, &i);
696 if (flen == 0) goto parse_error;
697 name = strndup (&value[i], flen);
698 if (name == NULL) {
699 nbdkit_error ("strndup: %m");
700 return -1;
702 i += flen;
703 APPEND_EXPR (new_node (expr (EXPR_NAME, name)));
704 break;
707 case '-': { /* -> \\NAME */
708 char *name;
710 i++;
711 if (value[i] != '>') goto parse_error;
712 i++;
713 if (list.len == 0) {
714 nbdkit_error ("-> must follow an expression");
715 return -1;
717 if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
718 nbdkit_error ("-> cannot be applied to this type of expression");
719 return -1;
721 flen = get_name (value, i, len, &i);
722 if (flen == 0) goto parse_error;
723 name = strndup (&value[i], flen);
724 if (name == NULL) {
725 nbdkit_error ("strndup: %m");
726 return -1;
728 id = list.ptr[list.len-1];
729 i += flen;
730 list.len--;
731 APPEND_EXPR (new_node (expr (EXPR_ASSIGN, name, id)));
732 break;
735 case '$': { /* $VAR */
736 CLEANUP_FREE char *name = NULL;
737 const char *content;
738 size_t ci;
740 flen = get_var (value, i, len, &i);
741 if (flen == 0) goto parse_error;
742 name = strndup (&value[i], flen);
743 if (name == NULL) {
744 nbdkit_error ("strndup: %m");
745 return -1;
747 i += flen;
749 /* Look up the variable. */
750 content = get_extra_param (name);
751 if (!content) {
752 content = getenv (name);
753 if (!content) {
754 nbdkit_error ("$%s: variable not found", name);
755 return -1;
759 /* Call self recursively on the variable content. */
760 ci = 0;
761 if (parser (0, content, &ci, strlen (content), &id) == -1)
762 return -1;
763 /* parser() always returns a list. */
764 assert (get_node (id).t == EXPR_LIST);
765 APPEND_EXPR (id);
766 break;
769 case '0': case '1': case '2': case '3': case '4': /* BYTE */
770 case '5': case '6': case '7': case '8': case '9':
771 /* We need to use %i here so it scans 0x etc correctly. */
772 if (sscanf (&value[i], "%i%n", &j, &n) == 1)
773 i += n;
774 else
775 goto parse_error;
776 if (j < 0 || j > 255) {
777 nbdkit_error ("data parameter BYTE must be in the range 0..255");
778 return -1;
780 APPEND_EXPR (new_node (expr (EXPR_BYTE, j)));
781 break;
783 case 'l': case 'b': { /* le or be + NN: + WORD */
784 string str = empty_vector;
786 if (parse_word (value, &i, len, &str) == -1)
787 return -1;
788 APPEND_EXPR (new_node (expr (EXPR_STRING, str)));
789 break;
792 case ' ': case '\t': case '\n': /* Skip whitespace. */
793 case '\f': case '\r': case '\v':
794 i++;
795 break;
797 default:
798 parse_error:
799 nbdkit_error ("data parameter: parsing error at offset %zu", i);
800 return -1;
801 } /* switch */
802 } /* for */
804 /* If we reach the end of the string and level != 0 that means
805 * there is an unmatched '(' in the string.
807 if (level > 0) {
808 nbdkit_error ("unmatched '(' in data string");
809 return -1;
812 out:
813 *start = i;
815 /* Return the node ID. */
816 *rtn = new_node (expr (EXPR_LIST, list));
817 return 0;
820 /* Return the next \NAME in the input. This skips whitespace, setting
821 * *initial to the index of the start of the NAME (minus backslash).
822 * It returns the length of NAME (minus backslash), or 0 if not found.
824 static size_t
825 get_name (const char *value, size_t i, size_t len, size_t *initial)
827 size_t r = 0;
829 while (i < len) {
830 if (!ascii_isspace (value[i]))
831 break;
832 i++;
835 if (i >= len || value[i] != '\\') return 0;
836 i++;
837 if (i >= len) return 0;
838 *initial = i;
840 while (i < len &&
841 (ascii_isalnum (value[i]) || value[i] == '_' || value[i] == '-')) {
842 i++;
843 r++;
846 return r;
849 /* Like get_name above, but for $VAR variables. The accepted variable
850 * name is /\$[a-z_][a-z0-9_]+/i
852 static size_t
853 get_var (const char *value, size_t i, size_t len, size_t *initial)
855 size_t r = 0;
857 while (i < len) {
858 if (!ascii_isspace (value[i]))
859 break;
860 i++;
863 if (i >= len || value[i] != '$') return 0;
864 i++;
865 if (i >= len) return 0;
866 *initial = i;
868 if (!ascii_isalpha (value[i]) && value[i] != '_')
869 return 0;
871 while (i < len && (ascii_isalnum (value[i]) || value[i] == '_')) {
872 i++;
873 r++;
876 return r;
879 /* Find end of a <(SCRIPT), ignoring nested (). */
880 static size_t
881 get_script (const char *value, size_t i, size_t len)
883 int lvl = 0;
884 size_t r = 0;
886 for (; i < len; ++i, ++r) {
887 if (value[i] == '(')
888 lvl++;
889 else if (value[i] == ')') {
890 if (lvl > 0)
891 lvl--;
892 else
893 break;
897 if (i >= len)
898 return 0;
900 return r;
903 /* Parse a "String" with C-like escaping, and store it in the string
904 * vector. When this is called we have already consumed the initial
905 * double quote character.
907 static int
908 parse_string (const char *value, size_t *start, size_t len, string *rtn)
910 size_t i = *start;
911 unsigned char c, x0, x1;
913 *rtn = (string) empty_vector;
915 for (; i < len; ++i) {
916 c = value[i];
917 switch (c) {
918 case '"':
919 /* End of the string. */
920 *start = i+1;
921 return 0;
923 case '\\':
924 /* Start of escape sequence. */
925 if (++i == len) goto unexpected_end_of_string;
926 c = value[i];
927 switch (c) {
928 case 'a': c = 0x7; break;
929 case 'b': c = 0x8; break;
930 case 'f': c = 0xc; break;
931 case 'n': c = 0xa; break;
932 case 'r': c = 0xd; break;
933 case 't': c = 0x9; break;
934 case 'v': c = 0xb; break;
935 case '\\': case '"': break;
936 case 'x':
937 if (++i == len) goto unexpected_end_of_string;
938 x0 = value[i];
939 if (++i == len) goto unexpected_end_of_string;
940 x1 = value[i];
941 if (!ascii_isxdigit (x0) || !ascii_isxdigit (x1)) {
942 nbdkit_error ("data: \\xNN must be followed by exactly "
943 "two hexadecimal characters");
944 return -1;
946 c = hexbyte (x0, x1);
947 break;
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7': case '8': case '9':
950 case 'u':
951 nbdkit_error ("data: string numeric and unicode sequences "
952 "are not yet implemented");
953 return -1;
955 /*FALLTHROUGH*/
956 default:
957 /* Any other character is added to the string. */
958 if (string_append (rtn, c) == -1) {
959 nbdkit_error ("realloc: %m");
960 return -1;
965 /* If we reach here then we have run off the end of the data string
966 * without finding the final quote.
968 unexpected_end_of_string:
969 nbdkit_error ("data parameter: unterminated string");
970 return -1;
973 /* Parse le<NN>:WORD be<NN>:WORD expressions. These are parsed into strings. */
974 static int
975 parse_word (const char *value, size_t *start, size_t len, string *rtn)
977 size_t i = *start;
978 CLEANUP_FREE_STRING string copy = empty_vector;
979 size_t n;
980 enum { little, big } endian;
981 uint16_t u16;
982 uint32_t u32;
983 uint64_t u64;
985 *rtn = (string) empty_vector;
987 /* It's convenient to use the nbdkit_parse* functions below because
988 * they deal already with overflow etc. However these functions
989 * require a \0-terminated string, so we have to copy what we are
990 * parsing to a new string here.
992 while (i < len) {
993 if (ascii_isspace (value[i]))
994 break;
995 i++;
997 n = i - *start;
998 assert (n > 0); /* must be because caller parsed 'l'/'b' */
999 if (string_reserve (&copy, n + 1) == -1) {
1000 nbdkit_error ("realloc: %m");
1001 return -1;
1003 memcpy (copy.ptr, &value[*start], n);
1004 copy.ptr[n] = '\0';
1005 copy.len = n + 1;
1006 *start = i;
1008 /* Reserve enough space in the return buffer for the longest
1009 * possible bitstring (64 bits / 8 bytes).
1011 if (string_reserve (rtn, 8) == -1) {
1012 nbdkit_error ("realloc: %m");
1013 return -1;
1016 /* Parse the rest of {le|be}{16|32|64}: */
1017 if (strncmp (copy.ptr, "le16:", 5) == 0) {
1018 endian = little; rtn->len = 2;
1020 else if (strncmp (copy.ptr, "le32:", 5) == 0) {
1021 endian = little; rtn->len = 4;
1023 else if (strncmp (copy.ptr, "le64:", 5) == 0) {
1024 endian = little; rtn->len = 8;
1026 else if (strncmp (copy.ptr, "be16:", 5) == 0) {
1027 endian = big; rtn->len = 2;
1029 else if (strncmp (copy.ptr, "be32:", 5) == 0) {
1030 endian = big; rtn->len = 4;
1032 else if (strncmp (copy.ptr, "be64:", 5) == 0) {
1033 endian = big; rtn->len = 8;
1035 else {
1036 nbdkit_error ("data parameter: expected \"le16/32/64:\" "
1037 "or \"be16/32/64:\" at offset %zu", i);
1038 return -1;
1041 /* Parse the word field into a host-order unsigned int. */
1042 switch (rtn->len) {
1043 case 2:
1044 if (nbdkit_parse_uint16_t ("data", &copy.ptr[5], &u16) == -1)
1045 return -1;
1046 break;
1047 case 4:
1048 if (nbdkit_parse_uint32_t ("data", &copy.ptr[5], &u32) == -1)
1049 return -1;
1050 break;
1051 case 8:
1052 if (nbdkit_parse_uint64_t ("data", &copy.ptr[5], &u64) == -1)
1053 return -1;
1054 break;
1055 default: abort ();
1058 /* Now depending on the endianness and size, convert to the final
1059 * bitstring.
1061 switch (endian) {
1062 case little:
1063 switch (rtn->len) {
1064 case 2: /* le16: */
1065 *((uint16_t *) rtn->ptr) = htole16 (u16);
1066 break;
1067 case 4: /* le32: */
1068 *((uint32_t *) rtn->ptr) = htole32 (u32);
1069 break;
1070 case 8: /* le64: */
1071 *((uint64_t *) rtn->ptr) = htole64 (u64);
1072 break;
1073 default: abort ();
1075 break;
1077 case big:
1078 switch (rtn->len) {
1079 case 2: /* be16: */
1080 *((uint16_t *) rtn->ptr) = htobe16 (u16);
1081 break;
1082 case 4: /* be32: */
1083 *((uint32_t *) rtn->ptr) = htobe32 (u32);
1084 break;
1085 case 8: /* be64: */
1086 *((uint64_t *) rtn->ptr) = htobe64 (u64);
1087 break;
1088 default: abort ();
1092 return 0;
1095 /* This simple optimization pass over the AST simplifies some
1096 * expressions.
1098 static bool expr_safe_to_inline (const expr_t);
1099 static bool list_safe_to_inline (const node_ids);
1100 static bool expr_is_single_byte (const expr_t, uint8_t *b);
1101 static bool exprs_can_combine (expr_t e0, expr_t e1, node_id *id_rtn);
1103 static int
1104 optimize_ast (node_id root, node_id *root_rtn)
1106 size_t i, j;
1107 node_id id;
1108 node_ids list = empty_vector;
1109 expr_t e2;
1111 switch (get_node (root).t) {
1112 case EXPR_LIST:
1113 /* For convenience this makes a new list node. */
1115 /* Optimize each element of the list. */
1116 for (i = 0; i < get_node (root).list.len; ++i) {
1117 id = get_node (root).list.ptr[i];
1118 if (optimize_ast (id, &id) == -1)
1119 return -1;
1120 switch (get_node (id).t) {
1121 case EXPR_NULL:
1122 /* null elements of a list can be ignored. */
1123 break;
1124 case EXPR_LIST:
1125 /* Simple lists can be inlined, but be careful with
1126 * assignments, offsets and other expressions which are scoped
1127 * because flattening the list changes the scope.
1129 if (list_safe_to_inline (get_node (id).list)) {
1130 for (j = 0; j < get_node (id).list.len; ++j) {
1131 if (node_ids_append (&list, get_node (id).list.ptr[j]) == -1)
1132 goto list_append_error;
1134 break;
1136 /*FALLTHROUGH*/
1137 default:
1138 if (node_ids_append (&list, id) == -1) {
1139 list_append_error:
1140 nbdkit_error ("realloc: %m");
1141 return -1;
1146 /* Combine adjacent pairs of elements if possible. */
1147 for (i = 1; i < list.len; ++i) {
1148 node_id id0, id1;
1150 id0 = list.ptr[i-1];
1151 id1 = list.ptr[i];
1152 if (exprs_can_combine (get_node (id0), get_node (id1), &id)) {
1153 list.ptr[i-1] = id;
1154 node_ids_remove (&list, i);
1155 --i;
1159 /* List of length 0 is replaced with null. */
1160 if (list.len == 0) {
1161 free (list.ptr);
1162 *root_rtn = new_node (expr (EXPR_NULL));
1163 return 0;
1166 /* List of length 1 is replaced with the first element, but as
1167 * above avoid inlining if it is not a safe expression.
1169 if (list.len == 1 && expr_safe_to_inline (get_node (list.ptr[0]))) {
1170 id = list.ptr[0];
1171 free (list.ptr);
1172 *root_rtn = id;
1173 return 0;
1176 *root_rtn = new_node (expr (EXPR_LIST, list));
1177 return 0;
1179 case EXPR_ASSIGN:
1180 id = get_node (root).a.id;
1181 if (optimize_ast (id, &id) == -1)
1182 return -1;
1183 e2 = copy_expr (get_node (root));
1184 e2.a.id = id;
1185 *root_rtn = new_node (e2);
1186 return 0;
1188 case EXPR_REPEAT:
1189 /* Repeating zero times can be replaced by null. */
1190 if (get_node (root).r.n == 0) {
1191 *root_rtn = new_node (expr (EXPR_NULL));
1192 return 0;
1194 id = get_node (root).r.id;
1195 if (optimize_ast (id, &id) == -1)
1196 return -1;
1198 /* expr*1 can be replaced with simply expr.
1199 * null*N can be replaced with null.
1201 if (get_node (root).r.n == 1 || get_node (id).t == EXPR_NULL) {
1202 *root_rtn = id;
1203 return 0;
1205 /* expr*X*Y can be replaced by expr*(X*Y). */
1206 if (get_node (id).t == EXPR_REPEAT) {
1207 *root_rtn = new_node (expr (EXPR_REPEAT,
1208 get_node (id).r.id,
1209 get_node (root).r.n * get_node (id).r.n));
1210 return 0;
1212 /* fill(b,X)*Y can be replaced by fill(b,X*Y). */
1213 if (get_node (id).t == EXPR_FILL) {
1214 *root_rtn = new_node (expr (EXPR_FILL,
1215 get_node (id).fl.b,
1216 get_node (root).r.n * get_node (id).fl.n));
1217 return 0;
1219 /* For short strings and small values or N, string*N can be
1220 * replaced by N copies of the string.
1222 if (get_node (id).t == EXPR_STRING &&
1223 get_node (root).r.n <= 4 &&
1224 get_node (id).string.len <= 512) {
1225 string s = empty_vector;
1226 size_t n = get_node (root).r.n;
1227 const string sub = get_node (id).string;
1229 for (i = 0; i < n; ++i) {
1230 for (j = 0; j < sub.len; ++j) {
1231 if (string_append (&s, sub.ptr[j]) == -1) {
1232 nbdkit_error ("realloc: %m");
1233 return -1;
1238 *root_rtn = new_node (expr (EXPR_STRING, s));
1239 return 0;
1241 /* Single byte expression * N can be replaced by a fill. */
1243 uint8_t b;
1245 if (expr_is_single_byte (get_node (id), &b)) {
1246 *root_rtn = new_node (expr (EXPR_FILL, b, get_node (root).r.n));
1247 return 0;
1251 e2 = copy_expr (get_node (root));
1252 e2.r.id = id;
1253 *root_rtn = new_node (e2);
1254 return 0;
1256 case EXPR_SLICE: {
1257 uint64_t n = get_node (root).sl.n;
1258 int64_t m = get_node (root).sl.m;
1259 uint64_t len;
1261 /* A zero-length slice can be replaced by null. */
1262 if (n == m) {
1263 *root_rtn = new_node (expr (EXPR_NULL));
1264 return 0;
1266 id = get_node (root).sl.id;
1267 if (optimize_ast (id, &id) == -1)
1268 return -1;
1270 /* Some constant expressions can be sliced into something shorter.
1271 * Be conservative. If the slice is invalid then we prefer to do
1272 * nothing here because the whole expression might be optimized
1273 * away and if it isn't then we will give an error later.
1275 switch (get_node (id).t) {
1276 case EXPR_NULL: /* null[:0] or null[0:] => null */
1277 if (m == 0 || (n == 0 && m == -1)) {
1278 *root_rtn = new_node (expr (EXPR_NULL));
1279 return 0;
1281 break;
1282 case EXPR_BYTE: /* byte[:1] or byte[0:] => byte */
1283 if (m == 1 || (n == 0 && m == -1)) {
1284 *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).b));
1285 return 0;
1287 break;
1288 case EXPR_STRING: /* substring */
1289 len = get_node (id).string.len;
1290 if (m >= 0 && n <= m && m <= len) {
1291 if (m-n == 1)
1292 *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).string.ptr[n]));
1293 else {
1294 string sub = substring (get_node (id).string, n, m-n);
1295 *root_rtn = new_node (expr (EXPR_STRING, sub));
1297 return 0;
1299 if (m == -1 && n <= len) {
1300 if (len-n == 1)
1301 *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).string.ptr[n]));
1302 else {
1303 string sub = substring (get_node (id).string, n, len-n);
1304 *root_rtn = new_node (expr (EXPR_STRING, sub));
1306 return 0;
1308 break;
1309 case EXPR_FILL: /* slice of a fill is a shorter fill */
1310 len = get_node (id).fl.n;
1311 if (m >= 0 && n <= m && m <= len) {
1312 if (m-n == 1)
1313 *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).fl.b));
1314 else
1315 *root_rtn = new_node (expr (EXPR_FILL, get_node (id).fl.b, m-n));
1316 return 0;
1318 if (m == -1 && n <= len) {
1319 if (len-n == 1)
1320 *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).fl.b));
1321 else
1322 *root_rtn = new_node (expr (EXPR_FILL, get_node (id).fl.b, len-n));
1323 return 0;
1325 break;
1326 default: ;
1329 e2 = copy_expr (get_node (root));
1330 e2.sl.id = id;
1331 *root_rtn = new_node (e2);
1332 return 0;
1335 case EXPR_STRING:
1336 /* A zero length string can be replaced with null. */
1337 if (get_node (root).string.len == 0) {
1338 *root_rtn = new_node (expr (EXPR_NULL));
1339 return 0;
1341 /* Strings containing the same character can be replaced by a
1342 * fill. These can be produced by other optimizations.
1344 if (get_node (root).string.len > 1) {
1345 const string s = get_node (root).string;
1346 uint8_t b = s.ptr[0];
1348 for (i = 1; i < s.len; ++i)
1349 if (s.ptr[i] != b)
1350 break;
1352 if (i == s.len) {
1353 *root_rtn = new_node (expr (EXPR_FILL, b, (uint64_t) s.len));
1354 return 0;
1357 *root_rtn = root;
1358 return 0;
1360 case EXPR_FILL:
1361 /* Zero-length fill can be replaced by null. */
1362 if (get_node (root).fl.n == 0) {
1363 *root_rtn = new_node (expr (EXPR_NULL));
1364 return 0;
1366 *root_rtn = root;
1367 return 0;
1369 case EXPR_NULL:
1370 case EXPR_BYTE:
1371 case EXPR_ABS_OFFSET:
1372 case EXPR_REL_OFFSET:
1373 case EXPR_ALIGN_OFFSET:
1374 case EXPR_FILE:
1375 case EXPR_SCRIPT:
1376 case EXPR_NAME:
1377 *root_rtn = root;
1378 return 0;
1381 abort ();
1384 /* Test if an expression can be safely inlined in a superior list
1385 * without changing the meaning of any scoped expressions.
1387 static bool
1388 expr_safe_to_inline (const expr_t e)
1390 switch (e.t) {
1391 /* Assignments and named expressions are scoped. */
1392 case EXPR_ASSIGN:
1393 case EXPR_NAME:
1394 return false;
1396 /* @Offsets are scoped. */
1397 case EXPR_ABS_OFFSET:
1398 case EXPR_REL_OFFSET:
1399 case EXPR_ALIGN_OFFSET:
1400 return false;
1402 /* Everything else should be safe. */
1403 case EXPR_NULL:
1404 case EXPR_LIST:
1405 case EXPR_BYTE:
1406 case EXPR_FILE:
1407 case EXPR_SCRIPT:
1408 case EXPR_STRING:
1409 case EXPR_FILL:
1410 case EXPR_REPEAT:
1411 case EXPR_SLICE:
1415 return true;
1418 /* Test if a list of expressions is safe to inline in a superior list. */
1419 static bool
1420 list_safe_to_inline (const node_ids list)
1422 size_t i;
1424 for (i = 0; i < list.len; ++i) {
1425 if (!expr_safe_to_inline (get_node (list.ptr[i])))
1426 return false;
1429 return true;
1432 /* For some constant expressions which are a length 1 byte, return
1433 * true and the byte.
1435 static bool
1436 expr_is_single_byte (const expr_t e, uint8_t *b)
1438 switch (e.t) {
1439 case EXPR_BYTE: /* A single byte. */
1440 if (b) *b = e.b;
1441 return true;
1442 case EXPR_LIST: /* A single element list if it is single byte */
1443 if (e.list.len != 1)
1444 return false;
1445 return expr_is_single_byte (get_node (e.list.ptr[0]), b);
1446 case EXPR_STRING: /* A length-1 string. */
1447 if (e.string.len != 1)
1448 return false;
1449 if (b) *b = e.string.ptr[0];
1450 return true;
1451 case EXPR_FILL: /* A length-1 fill. */
1452 if (e.fl.n != 1)
1453 return false;
1454 if (b) *b = e.fl.b;
1455 return true;
1456 case EXPR_REPEAT: /* EXPR*1 if EXPR is single byte */
1457 if (e.r.n != 1)
1458 return false;
1459 return expr_is_single_byte (get_node (e.r.id), b);
1460 default:
1461 return false;
1465 /* Test if two adjacent constant expressions can be combined and if so
1466 * return a new expression which is the combination of both. For
1467 * example, two bytes are combined into a string (1 2 => "\x01\x02"),
1468 * or a string and a byte into a longer string ("\x01\x02" 3 =>
1469 * "\x01\x02\x03").
1471 static bool
1472 exprs_can_combine (expr_t e0, expr_t e1, node_id *id_rtn)
1474 string s = empty_vector;
1475 size_t len, len1;
1476 expr_t e2;
1478 switch (e0.t) {
1479 case EXPR_BYTE:
1480 switch (e1.t) {
1481 case EXPR_BYTE: /* byte byte => fill | string */
1482 if (e0.b == e1.b) {
1483 *id_rtn = new_node (expr (EXPR_FILL, e0.b, UINT64_C (2)));
1485 else {
1486 if (string_append (&s, e0.b) == -1 ||
1487 string_append (&s, e1.b) == -1)
1488 goto out_of_memory;
1489 *id_rtn = new_node (expr (EXPR_STRING, s));
1491 return true;
1492 case EXPR_STRING: /* byte string => string */
1493 len = e1.string.len;
1494 if (string_reserve (&s, len+1) == -1)
1495 goto out_of_memory;
1496 s.len = len+1;
1497 s.ptr[0] = e0.b;
1498 memcpy (&s.ptr[1], e1.string.ptr, len);
1499 *id_rtn = new_node (expr (EXPR_STRING, s));
1500 return true;
1501 case EXPR_FILL: /* byte fill => fill, if the same */
1502 if (e0.b != e1.fl.b)
1503 return false;
1504 e2 = copy_expr (e1);
1505 e2.fl.n++;
1506 *id_rtn = new_node (e2);
1507 return true;
1508 default:
1509 return false;
1512 case EXPR_STRING:
1513 switch (e1.t) {
1514 case EXPR_BYTE: /* string byte => string */
1515 len = e0.string.len;
1516 if (string_reserve (&s, len+1) == -1)
1517 goto out_of_memory;
1518 s.len = len+1;
1519 memcpy (s.ptr, e0.string.ptr, len);
1520 s.ptr[len] = e1.b;
1521 *id_rtn = new_node (expr (EXPR_STRING, s));
1522 return true;
1523 case EXPR_STRING: /* string string => string */
1524 len = e0.string.len;
1525 len1 = e1.string.len;
1526 if (string_reserve (&s, len+len1) == -1)
1527 goto out_of_memory;
1528 s.len = len+len1;
1529 memcpy (s.ptr, e0.string.ptr, len);
1530 memcpy (&s.ptr[len], e1.string.ptr, len1);
1531 *id_rtn = new_node (expr (EXPR_STRING, s));
1532 return true;
1533 default:
1534 return false;
1537 case EXPR_FILL:
1538 switch (e1.t) {
1539 case EXPR_BYTE: /* fill byte => fill, if the same */
1540 if (e0.fl.b != e1.b)
1541 return false;
1542 e2 = copy_expr (e0);
1543 e2.fl.n++;
1544 *id_rtn = new_node (e2);
1545 return true;
1546 case EXPR_FILL: /* fill fill => fill, if the same */
1547 if (e0.fl.b != e1.fl.b)
1548 return false;
1549 e2 = copy_expr (e0);
1550 e2.fl.n += e1.fl.n;
1551 *id_rtn = new_node (e2);
1552 return true;
1553 default:
1554 return false;
1557 default:
1558 return false;
1561 out_of_memory:
1562 nbdkit_error ("realloc: %m");
1563 exit (EXIT_FAILURE);
1566 static int store_file (struct allocator *a,
1567 const char *filename, uint64_t *offset);
1568 static int store_file_slice (struct allocator *a,
1569 const char *filename,
1570 uint64_t skip, int64_t end, uint64_t *offset);
1571 static int store_script (struct allocator *a,
1572 const char *script, uint64_t *offset);
1573 static int store_script_len (struct allocator *a,
1574 const char *script,
1575 int64_t len, uint64_t *offset);
1577 /* This is the evaluator. It takes the root (node_id) of the parsed
1578 * abstract syntax tree and evaulates it into the allocator.
1580 static int
1581 evaluate (const dict_t *dict, node_id root,
1582 struct allocator *a, uint64_t *offset, uint64_t *size)
1584 /* 'd' is the local dictionary for this function. Assignments are
1585 * added to the dictionary in this scope and passed to nested
1586 * scopes. This is leaked on error paths, but we're going to call
1587 * exit(1).
1589 dict_t *d = (dict_t *) dict;
1590 size_t i, j;
1591 node_ids list;
1593 /* Extract the list from the current node. If the current node is
1594 * not EXPR_LIST then make one for convenience below.
1596 if (get_node (root).t == EXPR_LIST) {
1597 list = get_node (root).list;
1599 else {
1600 list.len = 1;
1601 list.ptr = &root;
1604 for (i = 0; i < list.len; ++i) {
1605 const expr_t e = get_node (list.ptr[i]);
1607 switch (e.t) {
1608 case EXPR_NULL: /* does nothing */ break;
1610 case EXPR_BYTE:
1611 /* Store the byte. */
1612 if (a->f->write (a, &e.b, 1, *offset) == -1)
1613 return -1;
1614 (*offset)++;
1615 break;
1617 case EXPR_ABS_OFFSET:
1618 /* XXX Check it does not overflow 63 bits. */
1619 *offset = e.ui;
1620 break;
1622 case EXPR_REL_OFFSET:
1623 if (e.i < 0 && -e.i > *offset) {
1624 nbdkit_error ("data parameter @-%" PRIi64 " "
1625 "must not be larger than the current offset %" PRIu64,
1626 -e.i, *offset);
1627 return -1;
1629 /* XXX Check it does not overflow 63 bits. */
1630 *offset += e.i;
1631 break;
1633 case EXPR_ALIGN_OFFSET:
1634 *offset = ROUND_UP (*offset, e.ui);
1635 break;
1637 case EXPR_FILE:
1638 if (store_file (a, e.filename, offset) == -1)
1639 return -1;
1640 break;
1642 case EXPR_SCRIPT:
1643 if (store_script (a, e.script, offset) == -1)
1644 return -1;
1645 break;
1647 case EXPR_STRING:
1648 /* Copy the string into the allocator. */
1649 if (a->f->write (a, e.string.ptr, e.string.len, *offset) == -1)
1650 return -1;
1651 *offset += e.string.len;
1652 break;
1654 case EXPR_FILL:
1655 if (a->f->fill (a, e.fl.b, e.fl.n, *offset) == -1)
1656 return -1;
1657 *offset += e.fl.n;
1658 break;
1660 case EXPR_ASSIGN: {
1661 dict_t *d_next = d;
1663 d = malloc (sizeof *d);
1664 if (d == NULL) {
1665 nbdkit_error ("malloc: %m");
1666 return -1;
1668 d->next = d_next;
1669 d->name = e.a.name;
1670 d->id = e.a.id;
1671 break;
1674 case EXPR_NAME: {
1675 CLEANUP_FREE_ALLOCATOR struct allocator *a2 = NULL;
1676 uint64_t offset2 = 0, size2 = 0;
1677 dict_t *t;
1679 /* Look up the expression in the current dictionary. */
1680 for (t = d; t != NULL; t = t->next)
1681 if (strcmp (t->name, e.name) == 0)
1682 break;
1683 if (t == NULL) {
1684 nbdkit_error ("\\%s not defined", e.name);
1685 return -1;
1688 /* Evaluate and then substitute the expression. */
1689 a2 = create_allocator ("sparse", false);
1690 if (a2 == NULL) {
1691 nbdkit_error ("malloc: %m");
1692 return -1;
1694 /* NB: We pass the environment at the time that the assignment was
1695 * made (t->next) not the current environment. This is deliberate.
1697 if (evaluate (t->next, t->id, a2, &offset2, &size2) == -1)
1698 return -1;
1700 if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1701 return -1;
1702 *offset += size2;
1703 break;
1706 case EXPR_LIST:
1707 case EXPR_REPEAT:
1708 case EXPR_SLICE:
1709 /* Optimize some cases so we don't always have to create a
1710 * new allocator.
1713 /* <FILE[N:M] can be optimized by not reading in the whole file.
1714 * For files like /dev/urandom which are infinite this stops an
1715 * infinite loop.
1717 if (e.t == EXPR_SLICE && get_node (e.sl.id).t == EXPR_FILE) {
1718 if (store_file_slice (a, get_node (e.sl.id).filename,
1719 e.sl.n, e.sl.m, offset) == -1)
1720 return -1;
1723 /* <(SCRIPT)[:LEN] must be optimized by truncating the
1724 * output of the script.
1726 else if (e.t == EXPR_SLICE && e.sl.n == 0 &&
1727 get_node (e.sl.id).t == EXPR_SCRIPT) {
1728 if (store_script_len (a, get_node (e.sl.id).script, e.sl.m,
1729 offset) == -1)
1730 return -1;
1733 else {
1734 /* This is the non-optimized case.
1736 * Nesting creates a new context where there is a new allocator
1737 * and the offset is reset to 0.
1739 CLEANUP_FREE_ALLOCATOR struct allocator *a2 = NULL;
1740 uint64_t offset2 = 0, size2 = 0, m;
1741 node_id id;
1743 switch (e.t) {
1744 case EXPR_LIST: id = list.ptr[i]; break;
1745 case EXPR_REPEAT: id = e.r.id; break;
1746 case EXPR_SLICE: id = e.sl.id; break;
1747 default: abort ();
1750 a2 = create_allocator ("sparse", false);
1751 if (a2 == NULL) {
1752 nbdkit_error ("malloc: %m");
1753 return -1;
1755 if (evaluate (d, id, a2, &offset2, &size2) == -1)
1756 return -1;
1758 switch (e.t) {
1759 case EXPR_LIST:
1760 if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1761 return -1;
1762 *offset += size2;
1763 break;
1764 case EXPR_REPEAT:
1765 /* Duplicate the allocator a2 N times. */
1766 for (j = 0; j < e.r.n; ++j) {
1767 if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1768 return -1;
1769 *offset += size2;
1771 break;
1772 case EXPR_SLICE:
1773 /* Slice [N:M] */
1774 m = e.sl.m < 0 ? size2 : e.sl.m;
1775 if (e.sl.n < 0 || m < 0 ||
1776 e.sl.n > size2 || m > size2 ||
1777 e.sl.n > m ) {
1778 nbdkit_error ("[N:M] does not describe a valid slice");
1779 return -1;
1781 /* Take a slice from the allocator. */
1782 if (a->f->blit (a2, a, m-e.sl.n, e.sl.n, *offset) == -1)
1783 return -1;
1784 *offset += m-e.sl.n;
1785 break;
1786 default:
1787 abort ();
1790 break;
1793 /* Adjust the size if the offset is now larger. */
1794 if (*size < *offset)
1795 *size = *offset;
1796 } /* for */
1798 /* Free assignments in the local dictionary. */
1799 while (d != dict) {
1800 dict_t *t = d;
1801 d = d->next;
1802 free (t);
1805 return 0;
1808 /* Store file at current offset in the allocator, updating the offset. */
1809 static int
1810 store_file (struct allocator *a,
1811 const char *filename, uint64_t *offset)
1813 FILE *fp;
1814 char buf[BUFSIZ];
1815 size_t n;
1817 fp = fopen (filename, "r");
1818 if (fp == NULL) {
1819 nbdkit_error ("%s: %m", filename);
1820 return -1;
1823 while (!feof (fp)) {
1824 n = fread (buf, 1, BUFSIZ, fp);
1825 if (n > 0) {
1826 if (a->f->write (a, buf, n, *offset) == -1) {
1827 fclose (fp);
1828 return -1;
1831 if (ferror (fp)) {
1832 nbdkit_error ("fread: %s: %m", filename);
1833 fclose (fp);
1834 return -1;
1836 (*offset) += n;
1839 if (fclose (fp) == EOF) {
1840 nbdkit_error ("fclose: %s: %m", filename);
1841 return -1;
1844 return 0;
1847 /* <FILE[N:M] */
1848 static int
1849 store_file_slice (struct allocator *a,
1850 const char *filename,
1851 uint64_t skip, int64_t end, uint64_t *offset)
1853 FILE *fp;
1854 char buf[BUFSIZ];
1855 size_t n;
1856 uint64_t len = 0;
1858 if ((end >= 0 && skip > end) || end < -2) {
1859 nbdkit_error ("<FILE[N:M] does not describe a valid slice");
1860 return -1;
1863 if (end >= 0)
1864 len = end - skip;
1866 fp = fopen (filename, "r");
1867 if (fp == NULL) {
1868 nbdkit_error ("%s: %m", filename);
1869 return -1;
1872 if (fseek (fp, skip, SEEK_SET) == -1) {
1873 nbdkit_error ("%s: fseek: %m", filename);
1874 return -1;
1877 while (!feof (fp) && (end == -1 || len > 0)) {
1878 n = fread (buf, 1, end == -1 ? BUFSIZ : MIN (len, BUFSIZ), fp);
1879 if (n > 0) {
1880 if (a->f->write (a, buf, n, *offset) == -1) {
1881 fclose (fp);
1882 return -1;
1885 if (ferror (fp)) {
1886 nbdkit_error ("fread: %s: %m", filename);
1887 fclose (fp);
1888 return -1;
1890 (*offset) += n;
1891 len -= n;
1894 if (fclose (fp) == EOF) {
1895 nbdkit_error ("fclose: %s: %m", filename);
1896 return -1;
1899 return 0;
1902 #ifndef WIN32
1904 /* Run the script and store the output in the allocator from offset. */
1905 static int
1906 store_script (struct allocator *a,
1907 const char *script, uint64_t *offset)
1909 FILE *pp;
1910 char buf[BUFSIZ];
1911 size_t n;
1913 pp = popen (script, "r");
1914 if (pp == NULL) {
1915 nbdkit_error ("popen: %m");
1916 return -1;
1919 while (!feof (pp)) {
1920 n = fread (buf, 1, BUFSIZ, pp);
1921 if (n > 0) {
1922 if (a->f->write (a, buf, n, *offset) == -1) {
1923 pclose (pp);
1924 return -1;
1927 if (ferror (pp)) {
1928 nbdkit_error ("fread: %m");
1929 pclose (pp);
1930 return -1;
1932 (*offset) += n;
1935 if (pclose (pp) != 0) {
1936 nbdkit_error ("pclose: external command failed");
1937 return -1;
1940 return 0;
1943 /* Run the script and store up to len bytes of the output in the
1944 * allocator from offset.
1946 static int
1947 store_script_len (struct allocator *a,
1948 const char *script,
1949 int64_t len, uint64_t *offset)
1951 FILE *pp;
1952 char buf[BUFSIZ];
1953 size_t n;
1955 /* Restore SIGPIPE back to SIG_DFL, since shell can't undo SIG_IGN */
1956 signal (SIGPIPE, SIG_DFL);
1958 pp = popen (script, "r");
1959 if (pp == NULL) {
1960 nbdkit_error ("popen: %m");
1961 return -1;
1964 while (!feof (pp) && len > 0) {
1965 n = fread (buf, 1, MIN (len, BUFSIZ), pp);
1966 if (n > 0) {
1967 if (a->f->write (a, buf, n, *offset) == -1) {
1968 pclose (pp);
1969 return -1;
1972 if (ferror (pp)) {
1973 nbdkit_error ("fread: %m");
1974 pclose (pp);
1975 return -1;
1977 (*offset) += n;
1978 len -= n;
1981 /* Note for historical reasons we do not fail here if the external
1982 * script returns an error. That is because of existing documented
1983 * usage of scripts that generate infinite output which is
1984 * truncated. These scripts will almost always generate an error
1985 * (some kind of pipe fail), and because we didn't check that before
1986 * we cannot start to fail those scripts now.
1988 if (pclose (pp) == -1) {
1989 nbdkit_error ("pclose: %m");
1990 return -1;
1993 return 0;
1996 #else /* WIN32 */
1998 static int
1999 store_script (struct allocator *a,
2000 const char *script, uint64_t *offset)
2002 NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");
2005 static int
2006 store_script_len (struct allocator *a,
2007 const char *script,
2008 int64_t len, uint64_t *offset)
2010 NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");
2013 #endif /* WIN32 */