4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 #define NBDKIT_API_VERSION 2
46 #include <nbdkit-plugin.h>
48 #include "allocator.h"
49 #include "ascii-ctype.h"
50 #include "byte-swapping.h"
53 #include "ispowerof2.h"
55 #include "nbdkit-string.h"
59 #include "windows-compat.h"
64 /* To print the AST, use -D data.AST=1 */
65 NBDKIT_DLL_PUBLIC
int data_debug_AST
= 0;
67 /* The abstract syntax tree. */
68 typedef struct expr expr_t
;
71 substring (string s
, size_t offset
, size_t len
)
74 string r
= empty_vector
;
76 for (i
= 0; i
< len
; ++i
) {
77 assert (offset
+i
< s
.len
);
78 if (string_append (&r
, s
.ptr
[offset
+i
]) == -1) {
79 nbdkit_error ("realloc: %m");
87 typedef size_t node_id
; /* references a node in expr_table below */
88 DEFINE_VECTOR_TYPE (node_ids
, node_id
);
91 EXPR_NULL
= 0, /* null expression, no effect */
92 EXPR_LIST
, /* list - list of node IDs / nested expr */
93 EXPR_BYTE
, /* b - single byte */
94 EXPR_ABS_OFFSET
, /* ui - absolute offset (@OFFSET) */
95 EXPR_REL_OFFSET
, /* i - relative offset (@+N or @-N) */
96 EXPR_ALIGN_OFFSET
, /* ui - align offset (@^ALIGNMENT) */
97 EXPR_FILE
, /* filename - read a file */
98 EXPR_SCRIPT
, /* script - run script */
99 EXPR_STRING
, /* string - string + length */
100 EXPR_FILL
, /* fl.n, fl.b - fill same byte b, N times */
101 EXPR_NAME
, /* name - insert a named expression */
102 EXPR_ASSIGN
, /* a.name, a.id - assign name to expr */
103 EXPR_REPEAT
, /* r.id, r.n - expr * N */
104 EXPR_SLICE
, /* sl.id, sl.n, sl.m - expr[N:M] */
139 /* We store a list of expressions (expr_t) in a global table.
141 * When referencing one expression from another (eg. for EXPR_REPEAT)
142 * we refer to the index into this table (node_id) instead of pointing
143 * to the expr_t directly.
145 * This allows us to have nodes which reference each other or are
146 * shared or removed without having to worry about reference counting.
147 * The whole table is freed when the plugin is unloaded.
149 * As an optimization, the zeroth element (node_id == 0) is a common
150 * EXPR_NULL. (This is optimized automatically, don't use node_id 0
151 * explicitly, in particular because if the table hasn't yet been
152 * allocated then there is no zeroth element).
154 DEFINE_VECTOR_TYPE (expr_list
, expr_t
);
155 static expr_list expr_table
;
157 /* Add the expression to the table, returning the node_id. */
159 new_node (const expr_t e
)
161 if (expr_table
.len
== 0) {
162 static const expr_t enull
= { .t
= EXPR_NULL
};
163 if (expr_list_append (&expr_table
, enull
) == -1)
166 if (e
.t
== EXPR_NULL
)
168 if (expr_list_append (&expr_table
, e
) == -1) {
170 nbdkit_error ("realloc");
173 return expr_table
.len
-1;
176 /* Get an expression by node_id. */
178 get_node (node_id id
)
180 assert (id
< expr_table
.len
);
181 return expr_table
.ptr
[id
];
185 free_expr_table (void)
190 for (i
= 0; i
< expr_table
.len
; ++i
) {
193 case EXPR_LIST
: free (e
.list
.ptr
); break;
194 case EXPR_FILE
: free (e
.filename
); break;
195 case EXPR_SCRIPT
: free (e
.script
); break;
196 case EXPR_STRING
: free (e
.string
.ptr
); break;
197 case EXPR_NAME
: free (e
.name
); break;
198 case EXPR_ASSIGN
: free (e
.a
.name
); break;
203 expr_list_reset (&expr_table
);
206 /* Construct an expression. */
208 expr (enum expr_type t
, ...)
210 expr_t e
= { .t
= t
};
213 /* Note that you cannot pass uint8_t through varargs, so for the
214 * byte fields we use int here.
218 case EXPR_NULL
: /* nothing */ break;
219 case EXPR_LIST
: e
.list
= va_arg (args
, node_ids
); break;
220 case EXPR_BYTE
: e
.b
= va_arg (args
, int); break;
221 case EXPR_ABS_OFFSET
:
222 case EXPR_ALIGN_OFFSET
:
223 e
.ui
= va_arg (args
, uint64_t);
225 case EXPR_REL_OFFSET
: e
.i
= va_arg (args
, int64_t); break;
226 case EXPR_FILE
: e
.filename
= va_arg (args
, char *); break;
227 case EXPR_SCRIPT
: e
.script
= va_arg (args
, char *); break;
228 case EXPR_STRING
: e
.string
= va_arg (args
, string
); break;
230 e
.fl
.b
= va_arg (args
, int);
231 e
.fl
.n
= va_arg (args
, uint64_t);
233 case EXPR_NAME
: e
.name
= va_arg (args
, char *); break;
235 e
.a
.name
= va_arg (args
, char *);
236 e
.a
.id
= va_arg (args
, node_id
);
239 e
.r
.id
= va_arg (args
, node_id
);
240 e
.r
.n
= va_arg (args
, uint64_t);
243 e
.sl
.id
= va_arg (args
, node_id
);
244 e
.sl
.n
= va_arg (args
, uint64_t);
245 e
.sl
.m
= va_arg (args
, int64_t);
253 /* Make a shallow copy of an expression. */
258 /* These have fields that have to be duplicated. */
260 if (node_ids_duplicate (&e
.list
, &e
.list
) == -1) {
261 nbdkit_error ("malloc");
266 e
.filename
= strdup (e
.filename
);
267 if (e
.filename
== NULL
) {
268 nbdkit_error ("strdup");
273 e
.script
= strdup (e
.script
);
274 if (e
.script
== NULL
) {
275 nbdkit_error ("strdup");
280 if (string_duplicate (&e
.string
, &e
.string
) == -1) {
281 nbdkit_error ("malloc");
286 e
.name
= strdup (e
.name
);
287 if (e
.name
== NULL
) {
288 nbdkit_error ("strdup");
293 e
.a
.name
= strdup (e
.a
.name
);
294 if (e
.a
.name
== NULL
) {
295 nbdkit_error ("strdup");
300 /* These don't require anything special. */
303 case EXPR_ABS_OFFSET
:
304 case EXPR_REL_OFFSET
:
305 case EXPR_ALIGN_OFFSET
:
316 debug_indent (int level
)
318 static const char spaces
[41] = " ";
323 return &spaces
[40-4*level
];
327 debug_expr (node_id id
, int level
)
329 const expr_t e
= get_node (id
);
334 nbdkit_debug ("%snull", debug_indent (level
));
337 nbdkit_debug ("%s(", debug_indent (level
));
338 for (i
= 0; i
< e
.list
.len
; ++i
)
339 debug_expr (e
.list
.ptr
[i
], level
+1);
340 nbdkit_debug ("%s)", debug_indent (level
));
343 nbdkit_debug ("%s%" PRIu8
, debug_indent (level
), e
.b
);
345 case EXPR_ABS_OFFSET
:
346 nbdkit_debug ("%s@%" PRIu64
, debug_indent (level
), e
.ui
);
348 case EXPR_REL_OFFSET
:
349 nbdkit_debug ("%s@%" PRIi64
, debug_indent (level
), e
.i
);
351 case EXPR_ALIGN_OFFSET
:
352 nbdkit_debug ("%s@^%" PRIi64
, debug_indent (level
), e
.ui
);
355 nbdkit_debug ("%s<%s", debug_indent (level
), e
.filename
);
358 nbdkit_debug ("%s<(%s)", debug_indent (level
), e
.script
);
361 CLEANUP_FREE_STRING string s
= empty_vector
;
363 for (i
= 0; i
< e
.string
.len
; ++i
) {
364 char c
= e
.string
.ptr
[i
];
365 if (ascii_isprint ((char) c
))
366 string_append (&s
, e
.string
.ptr
[i
]);
368 string_append (&s
, '\\');
369 string_append (&s
, 'x');
370 string_append (&s
, hexchar (c
>> 4));
371 string_append (&s
, hexchar (c
));
374 string_append (&s
, '\0');
375 nbdkit_debug ("%s\"%s\"", debug_indent (level
), s
.ptr
);
379 nbdkit_debug ("%sfill(%" PRIu8
"*%" PRIu64
")",
380 debug_indent (level
), e
.fl
.b
, e
.fl
.n
);
383 nbdkit_debug ("%s\\%s", debug_indent (level
), e
.name
);
386 nbdkit_debug ("%s(", debug_indent (level
));
387 debug_expr (e
.a
.id
, level
+1);
388 nbdkit_debug ("%s) -> \\%s", debug_indent (level
), e
.a
.name
);
391 nbdkit_debug ("%s(", debug_indent (level
));
392 debug_expr (e
.r
.id
, level
+1);
393 nbdkit_debug ("%s) *%" PRIu64
, debug_indent (level
), e
.r
.n
);
396 nbdkit_debug ("%s(", debug_indent (level
));
397 debug_expr (e
.sl
.id
, level
+1);
398 nbdkit_debug ("%s)[%" PRIu64
":%" PRIi64
"]",
399 debug_indent (level
), e
.sl
.n
, e
.sl
.m
);
404 /* Is the expression something which contains data, or something else
405 * (like an offset). Just a light check to reject nonsense
406 * expressions like "@0 * 10".
409 is_data_expr (const expr_t e
)
411 return e
.t
!= EXPR_ABS_OFFSET
&& e
.t
!= EXPR_REL_OFFSET
412 && e
.t
!= EXPR_ALIGN_OFFSET
;
415 /* Simple dictionary of name -> expression. */
416 typedef struct dict dict_t
;
419 const char *name
; /* Name excluding \ character. */
420 node_id id
; /* Associated expression. */
423 static int parser (int level
, const char *value
, size_t *start
, size_t len
,
425 static int optimize_ast (node_id root
, node_id
*root_rtn
);
426 static int evaluate (const dict_t
*dict
, node_id root
,
428 uint64_t *offset
, uint64_t *size
);
431 read_data_format (const char *value
, struct allocator
*a
, uint64_t *size_rtn
)
438 assert (expr_table
.len
== 0);
440 /* Run the parser across the entire string, returning the top level
443 if (parser (0, value
, &i
, strlen (value
), &root
) == -1)
446 if (optimize_ast (root
, &root
) == -1)
449 if (data_debug_AST
) {
450 nbdkit_debug ("BEGIN AST (-D data.AST=1)");
451 debug_expr (root
, 0);
452 nbdkit_debug ("END AST");
455 /* Evaluate the expression into the allocator. */
456 r
= evaluate (NULL
, root
, a
, &offset
, size_rtn
);
463 static int parse_string (const char *value
, size_t *start
, size_t len
,
465 static int parse_word (const char *value
, size_t *start
, size_t len
,
467 static size_t get_name (const char *value
, size_t i
, size_t len
,
469 static size_t get_var (const char *value
, size_t i
, size_t len
,
471 static size_t get_script (const char *value
, size_t i
, size_t len
);
473 /* This is the format parser. */
475 parser (int level
, const char *value
, size_t *start
, size_t len
,
479 /* List of node_ids that we are building up at this level. This is
480 * leaked on error paths, but we're going to call exit(1).
482 node_ids list
= empty_vector
;
485 #define APPEND_EXPR(node_id) \
487 if (node_ids_append (&list, (node_id)) == -1) { \
488 nbdkit_error ("realloc: %m"); \
489 exit (EXIT_FAILURE); \
498 case '#': /* # comment */
500 while (i
< len
&& value
[i
] != '\n')
504 case '@': /* @OFFSET */
505 if (++i
== len
) goto parse_error
;
508 if (++i
== len
) goto parse_error
;
509 if (sscanf (&value
[i
], "%" SCNi64
"%n", &i64
, &n
) == 1) {
511 nbdkit_error ("data parameter after @+ must not be negative");
515 APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET
, i64
)));
521 if (++i
== len
) goto parse_error
;
522 if (sscanf (&value
[i
], "%" SCNi64
"%n", &i64
, &n
) == 1) {
524 nbdkit_error ("data parameter after @- must not be negative");
528 APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET
, -i64
)));
533 case '^': /* @^ALIGNMENT */
534 if (++i
== len
) goto parse_error
;
535 /* We must use %i into i64 in order to parse 0x etc. */
536 if (sscanf (&value
[i
], "%" SCNi64
"%n", &i64
, &n
) == 1) {
538 nbdkit_error ("data parameter after @^ must not be negative");
541 /* XXX fix this arbitrary restriction */
542 if (!is_power_of_2 (i64
)) {
543 nbdkit_error ("data parameter @^%" PRIi64
" must be a power of 2",
548 APPEND_EXPR (new_node (expr (EXPR_ALIGN_OFFSET
, (uint64_t) i64
)));
553 case '0': case '1': case '2': case '3': case '4':
554 case '5': case '6': case '7': case '8': case '9':
555 /* We must use %i into i64 in order to parse 0x etc. */
556 if (sscanf (&value
[i
], "%" SCNi64
"%n", &i64
, &n
) == 1) {
558 nbdkit_error ("data parameter @OFFSET must not be negative");
562 APPEND_EXPR (new_node (expr (EXPR_ABS_OFFSET
, (uint64_t) i64
)));
575 /* Call self recursively. */
576 if (parser (level
+1, value
, &i
, len
, &id
) == -1)
578 /* parser() always returns a list. */
579 assert (get_node (id
).t
== EXPR_LIST
);
585 nbdkit_error ("unmatched ')' in data string");
591 case '*': /* expr*N */
594 nbdkit_error ("*N must follow an expression");
597 if (! is_data_expr (get_node (list
.ptr
[list
.len
-1]))) {
598 nbdkit_error ("*N cannot be applied to this type of expression");
601 if (sscanf (&value
[i
], "%" SCNi64
"%n", &i64
, &n
) == 1) {
603 nbdkit_error ("data parameter @OFFSET must not be negative");
609 nbdkit_error ("*N not numeric");
612 id
= list
.ptr
[list
.len
-1];
614 APPEND_EXPR (new_node (expr (EXPR_REPEAT
, id
, (uint64_t) i64
)));
617 case '[': /* expr[k:m] */
620 nbdkit_error ("[N:M] must follow an expression");
623 if (! is_data_expr (get_node (list
.ptr
[list
.len
-1]))) {
624 nbdkit_error ("[N:M] cannot be applied to this type of expression");
629 if (sscanf (&value
[i
], "%" SCNi64
":%" SCNi64
"]%n",
630 &i64
, &m
, &n
) == 2 ||
631 sscanf (&value
[i
], ":%" SCNi64
"]%n", &m
, &n
) == 1 ||
632 sscanf (&value
[i
], "%" SCNi64
":]%n", &i64
, &n
) == 1)
634 else if (strncmp (&value
[i
], ":]", 2) == 0)
637 nbdkit_error ("enclosed pattern (...)[N:M] not numeric");
640 id
= list
.ptr
[list
.len
-1];
642 APPEND_EXPR (new_node (expr (EXPR_SLICE
, id
, i64
, m
)));
646 if (i
+1 < len
&& value
[i
+1] == '(') { /* <(SCRIPT) */
651 flen
= get_script (value
, i
, len
);
652 if (flen
== 0) goto parse_error
;
653 script
= strndup (&value
[i
], flen
);
654 if (script
== NULL
) {
655 nbdkit_error ("strndup: %m");
658 i
+= flen
+ 1; /* +1 for trailing ) */
659 APPEND_EXPR (new_node (expr (EXPR_SCRIPT
, script
)));
666 /* The filename follows next in the string. */
667 flen
= strcspn (&value
[i
], "*[) \t\n");
669 nbdkit_error ("data parameter <FILE not a filename");
672 filename
= strndup (&value
[i
], flen
);
673 if (filename
== NULL
) {
674 nbdkit_error ("strndup: %m");
678 APPEND_EXPR (new_node (expr (EXPR_FILE
, filename
)));
682 case '"': { /* "String" */
683 string str
= empty_vector
;
686 if (parse_string (value
, &i
, len
, &str
) == -1)
688 APPEND_EXPR (new_node (expr (EXPR_STRING
, str
)));
692 case '\\': { /* \\NAME */
695 flen
= get_name (value
, i
, len
, &i
);
696 if (flen
== 0) goto parse_error
;
697 name
= strndup (&value
[i
], flen
);
699 nbdkit_error ("strndup: %m");
703 APPEND_EXPR (new_node (expr (EXPR_NAME
, name
)));
707 case '-': { /* -> \\NAME */
711 if (value
[i
] != '>') goto parse_error
;
714 nbdkit_error ("-> must follow an expression");
717 if (! is_data_expr (get_node (list
.ptr
[list
.len
-1]))) {
718 nbdkit_error ("-> cannot be applied to this type of expression");
721 flen
= get_name (value
, i
, len
, &i
);
722 if (flen
== 0) goto parse_error
;
723 name
= strndup (&value
[i
], flen
);
725 nbdkit_error ("strndup: %m");
728 id
= list
.ptr
[list
.len
-1];
731 APPEND_EXPR (new_node (expr (EXPR_ASSIGN
, name
, id
)));
735 case '$': { /* $VAR */
736 CLEANUP_FREE
char *name
= NULL
;
740 flen
= get_var (value
, i
, len
, &i
);
741 if (flen
== 0) goto parse_error
;
742 name
= strndup (&value
[i
], flen
);
744 nbdkit_error ("strndup: %m");
749 /* Look up the variable. */
750 content
= get_extra_param (name
);
752 content
= getenv (name
);
754 nbdkit_error ("$%s: variable not found", name
);
759 /* Call self recursively on the variable content. */
761 if (parser (0, content
, &ci
, strlen (content
), &id
) == -1)
763 /* parser() always returns a list. */
764 assert (get_node (id
).t
== EXPR_LIST
);
769 case '0': case '1': case '2': case '3': case '4': /* BYTE */
770 case '5': case '6': case '7': case '8': case '9':
771 /* We need to use %i here so it scans 0x etc correctly. */
772 if (sscanf (&value
[i
], "%i%n", &j
, &n
) == 1)
776 if (j
< 0 || j
> 255) {
777 nbdkit_error ("data parameter BYTE must be in the range 0..255");
780 APPEND_EXPR (new_node (expr (EXPR_BYTE
, j
)));
783 case 'l': case 'b': { /* le or be + NN: + WORD */
784 string str
= empty_vector
;
786 if (parse_word (value
, &i
, len
, &str
) == -1)
788 APPEND_EXPR (new_node (expr (EXPR_STRING
, str
)));
792 case ' ': case '\t': case '\n': /* Skip whitespace. */
793 case '\f': case '\r': case '\v':
799 nbdkit_error ("data parameter: parsing error at offset %zu", i
);
804 /* If we reach the end of the string and level != 0 that means
805 * there is an unmatched '(' in the string.
808 nbdkit_error ("unmatched '(' in data string");
815 /* Return the node ID. */
816 *rtn
= new_node (expr (EXPR_LIST
, list
));
820 /* Return the next \NAME in the input. This skips whitespace, setting
821 * *initial to the index of the start of the NAME (minus backslash).
822 * It returns the length of NAME (minus backslash), or 0 if not found.
825 get_name (const char *value
, size_t i
, size_t len
, size_t *initial
)
830 if (!ascii_isspace (value
[i
]))
835 if (i
>= len
|| value
[i
] != '\\') return 0;
837 if (i
>= len
) return 0;
841 (ascii_isalnum (value
[i
]) || value
[i
] == '_' || value
[i
] == '-')) {
849 /* Like get_name above, but for $VAR variables. The accepted variable
850 * name is /\$[a-z_][a-z0-9_]+/i
853 get_var (const char *value
, size_t i
, size_t len
, size_t *initial
)
858 if (!ascii_isspace (value
[i
]))
863 if (i
>= len
|| value
[i
] != '$') return 0;
865 if (i
>= len
) return 0;
868 if (!ascii_isalpha (value
[i
]) && value
[i
] != '_')
871 while (i
< len
&& (ascii_isalnum (value
[i
]) || value
[i
] == '_')) {
879 /* Find end of a <(SCRIPT), ignoring nested (). */
881 get_script (const char *value
, size_t i
, size_t len
)
886 for (; i
< len
; ++i
, ++r
) {
889 else if (value
[i
] == ')') {
903 /* Parse a "String" with C-like escaping, and store it in the string
904 * vector. When this is called we have already consumed the initial
905 * double quote character.
908 parse_string (const char *value
, size_t *start
, size_t len
, string
*rtn
)
911 unsigned char c
, x0
, x1
;
913 *rtn
= (string
) empty_vector
;
915 for (; i
< len
; ++i
) {
919 /* End of the string. */
924 /* Start of escape sequence. */
925 if (++i
== len
) goto unexpected_end_of_string
;
928 case 'a': c
= 0x7; break;
929 case 'b': c
= 0x8; break;
930 case 'f': c
= 0xc; break;
931 case 'n': c
= 0xa; break;
932 case 'r': c
= 0xd; break;
933 case 't': c
= 0x9; break;
934 case 'v': c
= 0xb; break;
935 case '\\': case '"': break;
937 if (++i
== len
) goto unexpected_end_of_string
;
939 if (++i
== len
) goto unexpected_end_of_string
;
941 if (!ascii_isxdigit (x0
) || !ascii_isxdigit (x1
)) {
942 nbdkit_error ("data: \\xNN must be followed by exactly "
943 "two hexadecimal characters");
946 c
= hexbyte (x0
, x1
);
948 case '0': case '1': case '2': case '3': case '4':
949 case '5': case '6': case '7': case '8': case '9':
951 nbdkit_error ("data: string numeric and unicode sequences "
952 "are not yet implemented");
957 /* Any other character is added to the string. */
958 if (string_append (rtn
, c
) == -1) {
959 nbdkit_error ("realloc: %m");
965 /* If we reach here then we have run off the end of the data string
966 * without finding the final quote.
968 unexpected_end_of_string
:
969 nbdkit_error ("data parameter: unterminated string");
973 /* Parse le<NN>:WORD be<NN>:WORD expressions. These are parsed into strings. */
975 parse_word (const char *value
, size_t *start
, size_t len
, string
*rtn
)
978 CLEANUP_FREE_STRING string copy
= empty_vector
;
980 enum { little
, big
} endian
;
985 *rtn
= (string
) empty_vector
;
987 /* It's convenient to use the nbdkit_parse* functions below because
988 * they deal already with overflow etc. However these functions
989 * require a \0-terminated string, so we have to copy what we are
990 * parsing to a new string here.
993 if (ascii_isspace (value
[i
]))
998 assert (n
> 0); /* must be because caller parsed 'l'/'b' */
999 if (string_reserve (©
, n
+ 1) == -1) {
1000 nbdkit_error ("realloc: %m");
1003 memcpy (copy
.ptr
, &value
[*start
], n
);
1008 /* Reserve enough space in the return buffer for the longest
1009 * possible bitstring (64 bits / 8 bytes).
1011 if (string_reserve (rtn
, 8) == -1) {
1012 nbdkit_error ("realloc: %m");
1016 /* Parse the rest of {le|be}{16|32|64}: */
1017 if (strncmp (copy
.ptr
, "le16:", 5) == 0) {
1018 endian
= little
; rtn
->len
= 2;
1020 else if (strncmp (copy
.ptr
, "le32:", 5) == 0) {
1021 endian
= little
; rtn
->len
= 4;
1023 else if (strncmp (copy
.ptr
, "le64:", 5) == 0) {
1024 endian
= little
; rtn
->len
= 8;
1026 else if (strncmp (copy
.ptr
, "be16:", 5) == 0) {
1027 endian
= big
; rtn
->len
= 2;
1029 else if (strncmp (copy
.ptr
, "be32:", 5) == 0) {
1030 endian
= big
; rtn
->len
= 4;
1032 else if (strncmp (copy
.ptr
, "be64:", 5) == 0) {
1033 endian
= big
; rtn
->len
= 8;
1036 nbdkit_error ("data parameter: expected \"le16/32/64:\" "
1037 "or \"be16/32/64:\" at offset %zu", i
);
1041 /* Parse the word field into a host-order unsigned int. */
1044 if (nbdkit_parse_uint16_t ("data", ©
.ptr
[5], &u16
) == -1)
1048 if (nbdkit_parse_uint32_t ("data", ©
.ptr
[5], &u32
) == -1)
1052 if (nbdkit_parse_uint64_t ("data", ©
.ptr
[5], &u64
) == -1)
1058 /* Now depending on the endianness and size, convert to the final
1065 *((uint16_t *) rtn
->ptr
) = htole16 (u16
);
1068 *((uint32_t *) rtn
->ptr
) = htole32 (u32
);
1071 *((uint64_t *) rtn
->ptr
) = htole64 (u64
);
1080 *((uint16_t *) rtn
->ptr
) = htobe16 (u16
);
1083 *((uint32_t *) rtn
->ptr
) = htobe32 (u32
);
1086 *((uint64_t *) rtn
->ptr
) = htobe64 (u64
);
1095 /* This simple optimization pass over the AST simplifies some
1098 static bool expr_safe_to_inline (const expr_t
);
1099 static bool list_safe_to_inline (const node_ids
);
1100 static bool expr_is_single_byte (const expr_t
, uint8_t *b
);
1101 static bool exprs_can_combine (expr_t e0
, expr_t e1
, node_id
*id_rtn
);
1104 optimize_ast (node_id root
, node_id
*root_rtn
)
1108 node_ids list
= empty_vector
;
1111 switch (get_node (root
).t
) {
1113 /* For convenience this makes a new list node. */
1115 /* Optimize each element of the list. */
1116 for (i
= 0; i
< get_node (root
).list
.len
; ++i
) {
1117 id
= get_node (root
).list
.ptr
[i
];
1118 if (optimize_ast (id
, &id
) == -1)
1120 switch (get_node (id
).t
) {
1122 /* null elements of a list can be ignored. */
1125 /* Simple lists can be inlined, but be careful with
1126 * assignments, offsets and other expressions which are scoped
1127 * because flattening the list changes the scope.
1129 if (list_safe_to_inline (get_node (id
).list
)) {
1130 for (j
= 0; j
< get_node (id
).list
.len
; ++j
) {
1131 if (node_ids_append (&list
, get_node (id
).list
.ptr
[j
]) == -1)
1132 goto list_append_error
;
1138 if (node_ids_append (&list
, id
) == -1) {
1140 nbdkit_error ("realloc: %m");
1146 /* Combine adjacent pairs of elements if possible. */
1147 for (i
= 1; i
< list
.len
; ++i
) {
1150 id0
= list
.ptr
[i
-1];
1152 if (exprs_can_combine (get_node (id0
), get_node (id1
), &id
)) {
1154 node_ids_remove (&list
, i
);
1159 /* List of length 0 is replaced with null. */
1160 if (list
.len
== 0) {
1162 *root_rtn
= new_node (expr (EXPR_NULL
));
1166 /* List of length 1 is replaced with the first element, but as
1167 * above avoid inlining if it is not a safe expression.
1169 if (list
.len
== 1 && expr_safe_to_inline (get_node (list
.ptr
[0]))) {
1176 *root_rtn
= new_node (expr (EXPR_LIST
, list
));
1180 id
= get_node (root
).a
.id
;
1181 if (optimize_ast (id
, &id
) == -1)
1183 e2
= copy_expr (get_node (root
));
1185 *root_rtn
= new_node (e2
);
1189 /* Repeating zero times can be replaced by null. */
1190 if (get_node (root
).r
.n
== 0) {
1191 *root_rtn
= new_node (expr (EXPR_NULL
));
1194 id
= get_node (root
).r
.id
;
1195 if (optimize_ast (id
, &id
) == -1)
1198 /* expr*1 can be replaced with simply expr.
1199 * null*N can be replaced with null.
1201 if (get_node (root
).r
.n
== 1 || get_node (id
).t
== EXPR_NULL
) {
1205 /* expr*X*Y can be replaced by expr*(X*Y). */
1206 if (get_node (id
).t
== EXPR_REPEAT
) {
1207 *root_rtn
= new_node (expr (EXPR_REPEAT
,
1209 get_node (root
).r
.n
* get_node (id
).r
.n
));
1212 /* fill(b,X)*Y can be replaced by fill(b,X*Y). */
1213 if (get_node (id
).t
== EXPR_FILL
) {
1214 *root_rtn
= new_node (expr (EXPR_FILL
,
1216 get_node (root
).r
.n
* get_node (id
).fl
.n
));
1219 /* For short strings and small values or N, string*N can be
1220 * replaced by N copies of the string.
1222 if (get_node (id
).t
== EXPR_STRING
&&
1223 get_node (root
).r
.n
<= 4 &&
1224 get_node (id
).string
.len
<= 512) {
1225 string s
= empty_vector
;
1226 size_t n
= get_node (root
).r
.n
;
1227 const string sub
= get_node (id
).string
;
1229 for (i
= 0; i
< n
; ++i
) {
1230 for (j
= 0; j
< sub
.len
; ++j
) {
1231 if (string_append (&s
, sub
.ptr
[j
]) == -1) {
1232 nbdkit_error ("realloc: %m");
1238 *root_rtn
= new_node (expr (EXPR_STRING
, s
));
1241 /* Single byte expression * N can be replaced by a fill. */
1245 if (expr_is_single_byte (get_node (id
), &b
)) {
1246 *root_rtn
= new_node (expr (EXPR_FILL
, b
, get_node (root
).r
.n
));
1251 e2
= copy_expr (get_node (root
));
1253 *root_rtn
= new_node (e2
);
1257 uint64_t n
= get_node (root
).sl
.n
;
1258 int64_t m
= get_node (root
).sl
.m
;
1261 /* A zero-length slice can be replaced by null. */
1263 *root_rtn
= new_node (expr (EXPR_NULL
));
1266 id
= get_node (root
).sl
.id
;
1267 if (optimize_ast (id
, &id
) == -1)
1270 /* Some constant expressions can be sliced into something shorter.
1271 * Be conservative. If the slice is invalid then we prefer to do
1272 * nothing here because the whole expression might be optimized
1273 * away and if it isn't then we will give an error later.
1275 switch (get_node (id
).t
) {
1276 case EXPR_NULL
: /* null[:0] or null[0:] => null */
1277 if (m
== 0 || (n
== 0 && m
== -1)) {
1278 *root_rtn
= new_node (expr (EXPR_NULL
));
1282 case EXPR_BYTE
: /* byte[:1] or byte[0:] => byte */
1283 if (m
== 1 || (n
== 0 && m
== -1)) {
1284 *root_rtn
= new_node (expr (EXPR_BYTE
, get_node (id
).b
));
1288 case EXPR_STRING
: /* substring */
1289 len
= get_node (id
).string
.len
;
1290 if (m
>= 0 && n
<= m
&& m
<= len
) {
1292 *root_rtn
= new_node (expr (EXPR_BYTE
, get_node (id
).string
.ptr
[n
]));
1294 string sub
= substring (get_node (id
).string
, n
, m
-n
);
1295 *root_rtn
= new_node (expr (EXPR_STRING
, sub
));
1299 if (m
== -1 && n
<= len
) {
1301 *root_rtn
= new_node (expr (EXPR_BYTE
, get_node (id
).string
.ptr
[n
]));
1303 string sub
= substring (get_node (id
).string
, n
, len
-n
);
1304 *root_rtn
= new_node (expr (EXPR_STRING
, sub
));
1309 case EXPR_FILL
: /* slice of a fill is a shorter fill */
1310 len
= get_node (id
).fl
.n
;
1311 if (m
>= 0 && n
<= m
&& m
<= len
) {
1313 *root_rtn
= new_node (expr (EXPR_BYTE
, get_node (id
).fl
.b
));
1315 *root_rtn
= new_node (expr (EXPR_FILL
, get_node (id
).fl
.b
, m
-n
));
1318 if (m
== -1 && n
<= len
) {
1320 *root_rtn
= new_node (expr (EXPR_BYTE
, get_node (id
).fl
.b
));
1322 *root_rtn
= new_node (expr (EXPR_FILL
, get_node (id
).fl
.b
, len
-n
));
1329 e2
= copy_expr (get_node (root
));
1331 *root_rtn
= new_node (e2
);
1336 /* A zero length string can be replaced with null. */
1337 if (get_node (root
).string
.len
== 0) {
1338 *root_rtn
= new_node (expr (EXPR_NULL
));
1341 /* Strings containing the same character can be replaced by a
1342 * fill. These can be produced by other optimizations.
1344 if (get_node (root
).string
.len
> 1) {
1345 const string s
= get_node (root
).string
;
1346 uint8_t b
= s
.ptr
[0];
1348 for (i
= 1; i
< s
.len
; ++i
)
1353 *root_rtn
= new_node (expr (EXPR_FILL
, b
, (uint64_t) s
.len
));
1361 /* Zero-length fill can be replaced by null. */
1362 if (get_node (root
).fl
.n
== 0) {
1363 *root_rtn
= new_node (expr (EXPR_NULL
));
1371 case EXPR_ABS_OFFSET
:
1372 case EXPR_REL_OFFSET
:
1373 case EXPR_ALIGN_OFFSET
:
1384 /* Test if an expression can be safely inlined in a superior list
1385 * without changing the meaning of any scoped expressions.
1388 expr_safe_to_inline (const expr_t e
)
1391 /* Assignments and named expressions are scoped. */
1396 /* @Offsets are scoped. */
1397 case EXPR_ABS_OFFSET
:
1398 case EXPR_REL_OFFSET
:
1399 case EXPR_ALIGN_OFFSET
:
1402 /* Everything else should be safe. */
1418 /* Test if a list of expressions is safe to inline in a superior list. */
1420 list_safe_to_inline (const node_ids list
)
1424 for (i
= 0; i
< list
.len
; ++i
) {
1425 if (!expr_safe_to_inline (get_node (list
.ptr
[i
])))
1432 /* For some constant expressions which are a length 1 byte, return
1433 * true and the byte.
1436 expr_is_single_byte (const expr_t e
, uint8_t *b
)
1439 case EXPR_BYTE
: /* A single byte. */
1442 case EXPR_LIST
: /* A single element list if it is single byte */
1443 if (e
.list
.len
!= 1)
1445 return expr_is_single_byte (get_node (e
.list
.ptr
[0]), b
);
1446 case EXPR_STRING
: /* A length-1 string. */
1447 if (e
.string
.len
!= 1)
1449 if (b
) *b
= e
.string
.ptr
[0];
1451 case EXPR_FILL
: /* A length-1 fill. */
1456 case EXPR_REPEAT
: /* EXPR*1 if EXPR is single byte */
1459 return expr_is_single_byte (get_node (e
.r
.id
), b
);
1465 /* Test if two adjacent constant expressions can be combined and if so
1466 * return a new expression which is the combination of both. For
1467 * example, two bytes are combined into a string (1 2 => "\x01\x02"),
1468 * or a string and a byte into a longer string ("\x01\x02" 3 =>
1472 exprs_can_combine (expr_t e0
, expr_t e1
, node_id
*id_rtn
)
1474 string s
= empty_vector
;
1481 case EXPR_BYTE
: /* byte byte => fill | string */
1483 *id_rtn
= new_node (expr (EXPR_FILL
, e0
.b
, UINT64_C (2)));
1486 if (string_append (&s
, e0
.b
) == -1 ||
1487 string_append (&s
, e1
.b
) == -1)
1489 *id_rtn
= new_node (expr (EXPR_STRING
, s
));
1492 case EXPR_STRING
: /* byte string => string */
1493 len
= e1
.string
.len
;
1494 if (string_reserve (&s
, len
+1) == -1)
1498 memcpy (&s
.ptr
[1], e1
.string
.ptr
, len
);
1499 *id_rtn
= new_node (expr (EXPR_STRING
, s
));
1501 case EXPR_FILL
: /* byte fill => fill, if the same */
1502 if (e0
.b
!= e1
.fl
.b
)
1504 e2
= copy_expr (e1
);
1506 *id_rtn
= new_node (e2
);
1514 case EXPR_BYTE
: /* string byte => string */
1515 len
= e0
.string
.len
;
1516 if (string_reserve (&s
, len
+1) == -1)
1519 memcpy (s
.ptr
, e0
.string
.ptr
, len
);
1521 *id_rtn
= new_node (expr (EXPR_STRING
, s
));
1523 case EXPR_STRING
: /* string string => string */
1524 len
= e0
.string
.len
;
1525 len1
= e1
.string
.len
;
1526 if (string_reserve (&s
, len
+len1
) == -1)
1529 memcpy (s
.ptr
, e0
.string
.ptr
, len
);
1530 memcpy (&s
.ptr
[len
], e1
.string
.ptr
, len1
);
1531 *id_rtn
= new_node (expr (EXPR_STRING
, s
));
1539 case EXPR_BYTE
: /* fill byte => fill, if the same */
1540 if (e0
.fl
.b
!= e1
.b
)
1542 e2
= copy_expr (e0
);
1544 *id_rtn
= new_node (e2
);
1546 case EXPR_FILL
: /* fill fill => fill, if the same */
1547 if (e0
.fl
.b
!= e1
.fl
.b
)
1549 e2
= copy_expr (e0
);
1551 *id_rtn
= new_node (e2
);
1562 nbdkit_error ("realloc: %m");
1563 exit (EXIT_FAILURE
);
1566 static int store_file (struct allocator
*a
,
1567 const char *filename
, uint64_t *offset
);
1568 static int store_file_slice (struct allocator
*a
,
1569 const char *filename
,
1570 uint64_t skip
, int64_t end
, uint64_t *offset
);
1571 static int store_script (struct allocator
*a
,
1572 const char *script
, uint64_t *offset
);
1573 static int store_script_len (struct allocator
*a
,
1575 int64_t len
, uint64_t *offset
);
1577 /* This is the evaluator. It takes the root (node_id) of the parsed
1578 * abstract syntax tree and evaulates it into the allocator.
1581 evaluate (const dict_t
*dict
, node_id root
,
1582 struct allocator
*a
, uint64_t *offset
, uint64_t *size
)
1584 /* 'd' is the local dictionary for this function. Assignments are
1585 * added to the dictionary in this scope and passed to nested
1586 * scopes. This is leaked on error paths, but we're going to call
1589 dict_t
*d
= (dict_t
*) dict
;
1593 /* Extract the list from the current node. If the current node is
1594 * not EXPR_LIST then make one for convenience below.
1596 if (get_node (root
).t
== EXPR_LIST
) {
1597 list
= get_node (root
).list
;
1604 for (i
= 0; i
< list
.len
; ++i
) {
1605 const expr_t e
= get_node (list
.ptr
[i
]);
1608 case EXPR_NULL
: /* does nothing */ break;
1611 /* Store the byte. */
1612 if (a
->f
->write (a
, &e
.b
, 1, *offset
) == -1)
1617 case EXPR_ABS_OFFSET
:
1618 /* XXX Check it does not overflow 63 bits. */
1622 case EXPR_REL_OFFSET
:
1623 if (e
.i
< 0 && -e
.i
> *offset
) {
1624 nbdkit_error ("data parameter @-%" PRIi64
" "
1625 "must not be larger than the current offset %" PRIu64
,
1629 /* XXX Check it does not overflow 63 bits. */
1633 case EXPR_ALIGN_OFFSET
:
1634 *offset
= ROUND_UP (*offset
, e
.ui
);
1638 if (store_file (a
, e
.filename
, offset
) == -1)
1643 if (store_script (a
, e
.script
, offset
) == -1)
1648 /* Copy the string into the allocator. */
1649 if (a
->f
->write (a
, e
.string
.ptr
, e
.string
.len
, *offset
) == -1)
1651 *offset
+= e
.string
.len
;
1655 if (a
->f
->fill (a
, e
.fl
.b
, e
.fl
.n
, *offset
) == -1)
1663 d
= malloc (sizeof *d
);
1665 nbdkit_error ("malloc: %m");
1675 CLEANUP_FREE_ALLOCATOR
struct allocator
*a2
= NULL
;
1676 uint64_t offset2
= 0, size2
= 0;
1679 /* Look up the expression in the current dictionary. */
1680 for (t
= d
; t
!= NULL
; t
= t
->next
)
1681 if (strcmp (t
->name
, e
.name
) == 0)
1684 nbdkit_error ("\\%s not defined", e
.name
);
1688 /* Evaluate and then substitute the expression. */
1689 a2
= create_allocator ("sparse", false);
1691 nbdkit_error ("malloc: %m");
1694 /* NB: We pass the environment at the time that the assignment was
1695 * made (t->next) not the current environment. This is deliberate.
1697 if (evaluate (t
->next
, t
->id
, a2
, &offset2
, &size2
) == -1)
1700 if (a
->f
->blit (a2
, a
, size2
, 0, *offset
) == -1)
1709 /* Optimize some cases so we don't always have to create a
1713 /* <FILE[N:M] can be optimized by not reading in the whole file.
1714 * For files like /dev/urandom which are infinite this stops an
1717 if (e
.t
== EXPR_SLICE
&& get_node (e
.sl
.id
).t
== EXPR_FILE
) {
1718 if (store_file_slice (a
, get_node (e
.sl
.id
).filename
,
1719 e
.sl
.n
, e
.sl
.m
, offset
) == -1)
1723 /* <(SCRIPT)[:LEN] must be optimized by truncating the
1724 * output of the script.
1726 else if (e
.t
== EXPR_SLICE
&& e
.sl
.n
== 0 &&
1727 get_node (e
.sl
.id
).t
== EXPR_SCRIPT
) {
1728 if (store_script_len (a
, get_node (e
.sl
.id
).script
, e
.sl
.m
,
1734 /* This is the non-optimized case.
1736 * Nesting creates a new context where there is a new allocator
1737 * and the offset is reset to 0.
1739 CLEANUP_FREE_ALLOCATOR
struct allocator
*a2
= NULL
;
1740 uint64_t offset2
= 0, size2
= 0, m
;
1744 case EXPR_LIST
: id
= list
.ptr
[i
]; break;
1745 case EXPR_REPEAT
: id
= e
.r
.id
; break;
1746 case EXPR_SLICE
: id
= e
.sl
.id
; break;
1750 a2
= create_allocator ("sparse", false);
1752 nbdkit_error ("malloc: %m");
1755 if (evaluate (d
, id
, a2
, &offset2
, &size2
) == -1)
1760 if (a
->f
->blit (a2
, a
, size2
, 0, *offset
) == -1)
1765 /* Duplicate the allocator a2 N times. */
1766 for (j
= 0; j
< e
.r
.n
; ++j
) {
1767 if (a
->f
->blit (a2
, a
, size2
, 0, *offset
) == -1)
1774 m
= e
.sl
.m
< 0 ? size2
: e
.sl
.m
;
1775 if (e
.sl
.n
< 0 || m
< 0 ||
1776 e
.sl
.n
> size2
|| m
> size2
||
1778 nbdkit_error ("[N:M] does not describe a valid slice");
1781 /* Take a slice from the allocator. */
1782 if (a
->f
->blit (a2
, a
, m
-e
.sl
.n
, e
.sl
.n
, *offset
) == -1)
1784 *offset
+= m
-e
.sl
.n
;
1793 /* Adjust the size if the offset is now larger. */
1794 if (*size
< *offset
)
1798 /* Free assignments in the local dictionary. */
1808 /* Store file at current offset in the allocator, updating the offset. */
1810 store_file (struct allocator
*a
,
1811 const char *filename
, uint64_t *offset
)
1817 fp
= fopen (filename
, "r");
1819 nbdkit_error ("%s: %m", filename
);
1823 while (!feof (fp
)) {
1824 n
= fread (buf
, 1, BUFSIZ
, fp
);
1826 if (a
->f
->write (a
, buf
, n
, *offset
) == -1) {
1832 nbdkit_error ("fread: %s: %m", filename
);
1839 if (fclose (fp
) == EOF
) {
1840 nbdkit_error ("fclose: %s: %m", filename
);
1849 store_file_slice (struct allocator
*a
,
1850 const char *filename
,
1851 uint64_t skip
, int64_t end
, uint64_t *offset
)
1858 if ((end
>= 0 && skip
> end
) || end
< -2) {
1859 nbdkit_error ("<FILE[N:M] does not describe a valid slice");
1866 fp
= fopen (filename
, "r");
1868 nbdkit_error ("%s: %m", filename
);
1872 if (fseek (fp
, skip
, SEEK_SET
) == -1) {
1873 nbdkit_error ("%s: fseek: %m", filename
);
1877 while (!feof (fp
) && (end
== -1 || len
> 0)) {
1878 n
= fread (buf
, 1, end
== -1 ? BUFSIZ
: MIN (len
, BUFSIZ
), fp
);
1880 if (a
->f
->write (a
, buf
, n
, *offset
) == -1) {
1886 nbdkit_error ("fread: %s: %m", filename
);
1894 if (fclose (fp
) == EOF
) {
1895 nbdkit_error ("fclose: %s: %m", filename
);
1904 /* Run the script and store the output in the allocator from offset. */
1906 store_script (struct allocator
*a
,
1907 const char *script
, uint64_t *offset
)
1913 pp
= popen (script
, "r");
1915 nbdkit_error ("popen: %m");
1919 while (!feof (pp
)) {
1920 n
= fread (buf
, 1, BUFSIZ
, pp
);
1922 if (a
->f
->write (a
, buf
, n
, *offset
) == -1) {
1928 nbdkit_error ("fread: %m");
1935 if (pclose (pp
) != 0) {
1936 nbdkit_error ("pclose: external command failed");
1943 /* Run the script and store up to len bytes of the output in the
1944 * allocator from offset.
1947 store_script_len (struct allocator
*a
,
1949 int64_t len
, uint64_t *offset
)
1955 /* Restore SIGPIPE back to SIG_DFL, since shell can't undo SIG_IGN */
1956 signal (SIGPIPE
, SIG_DFL
);
1958 pp
= popen (script
, "r");
1960 nbdkit_error ("popen: %m");
1964 while (!feof (pp
) && len
> 0) {
1965 n
= fread (buf
, 1, MIN (len
, BUFSIZ
), pp
);
1967 if (a
->f
->write (a
, buf
, n
, *offset
) == -1) {
1973 nbdkit_error ("fread: %m");
1981 /* Note for historical reasons we do not fail here if the external
1982 * script returns an error. That is because of existing documented
1983 * usage of scripts that generate infinite output which is
1984 * truncated. These scripts will almost always generate an error
1985 * (some kind of pipe fail), and because we didn't check that before
1986 * we cannot start to fail those scripts now.
1988 if (pclose (pp
) == -1) {
1989 nbdkit_error ("pclose: %m");
1999 store_script (struct allocator
*a
,
2000 const char *script
, uint64_t *offset
)
2002 NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");
2006 store_script_len (struct allocator
*a
,
2008 int64_t len
, uint64_t *offset
)
2010 NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");