plugins/data/format.c

   1 /* nbdkit
   2  * Copyright Red Hat
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *
  11  * * Redistributions in binary form must reproduce the above copyright
  12  * notice, this list of conditions and the following disclaimer in the
  13  * documentation and/or other materials provided with the distribution.
  14  *
  15  * * Neither the name of Red Hat nor the names of its contributors may be
  16  * used to endorse or promote products derived from this software without
  17  * specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  22  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
  23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  26  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  */
  32
  33 #include <config.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdbool.h>
  38 #include <stdint.h>
  39 #include <inttypes.h>
  40 #include <stdarg.h>
  41 #include <string.h>
  42 #include <assert.h>
  43 #include <signal.h>
  44
  45 #define NBDKIT_API_VERSION 2
  46 #include <nbdkit-plugin.h>
  47
  48 #include "allocator.h"
  49 #include "ascii-ctype.h"
  50 #include "byte-swapping.h"
  51 #include "cleanup.h"
  52 #include "hexdigit.h"
  53 #include "ispowerof2.h"
  54 #include "minmax.h"
  55 #include "nbdkit-string.h"
  56 #include "rounding.h"
  57 #include "strndup.h"
  58 #include "vector.h"
  59 #include "windows-compat.h"
  60
  61 #include "data.h"
  62 #include "format.h"
  63
  64 /* To print the AST, use -D data.AST=1 */
  65 NBDKIT_DLL_PUBLIC int data_debug_AST = 0;
  66
  67 /* The abstract syntax tree. */
  68 typedef struct expr expr_t;
  69
  70 static string
  71 substring (string s, size_t offset, size_t len)
  72 {
  73   size_t i;
  74   string r = empty_vector;
  75
  76   for (i = 0; i < len; ++i) {
  77     assert (offset+i < s.len);
  78     if (string_append (&r, s.ptr[offset+i]) == -1) {
  79       nbdkit_error ("realloc: %m");
  80       exit (EXIT_FAILURE);
  81     }
  82   }
  83
  84   return r;
  85 }
  86
  87 typedef size_t node_id;         /* references a node in expr_table below */
  88 DEFINE_VECTOR_TYPE (node_ids, node_id);
  89
  90 enum expr_type {
  91   EXPR_NULL = 0,              /* null expression, no effect */
  92   EXPR_LIST,                  /* list     - list of node IDs / nested expr */
  93   EXPR_BYTE,                  /* b        - single byte */
  94   EXPR_ABS_OFFSET,            /* ui       - absolute offset (@OFFSET) */
  95   EXPR_REL_OFFSET,            /* i        - relative offset (@+N or @-N) */
  96   EXPR_ALIGN_OFFSET,          /* ui       - align offset (@^ALIGNMENT) */
  97   EXPR_FILE,                  /* filename - read a file */
  98   EXPR_SCRIPT,                /* script   - run script */
  99   EXPR_STRING,                /* string   - string + length */
 100   EXPR_FILL,                  /* fl.n, fl.b - fill same byte b, N times */
 101   EXPR_NAME,                  /* name     - insert a named expression */
 102   EXPR_ASSIGN,                /* a.name, a.id - assign name to expr */
 103   EXPR_REPEAT,                /* r.id, r.n - expr * N */
 104   EXPR_SLICE,                 /* sl.id, sl.n, sl.m - expr[N:M] */
 105 };
 106
 107 struct expr {
 108   enum expr_type t;
 109   union {
 110     node_ids list;
 111     uint8_t b;
 112     int64_t i;
 113     uint64_t ui;
 114     node_id id;
 115     char *filename;
 116     char *script;
 117     string string;
 118     struct {
 119       uint64_t n;
 120       uint8_t b;
 121     } fl;
 122     char *name;
 123     struct {
 124       char *name;
 125       node_id id;
 126     } a;
 127     struct {
 128       node_id id;
 129       uint64_t n;
 130     } r;
 131     struct {
 132       node_id id;
 133       uint64_t n;
 134       int64_t m;
 135     } sl;
 136   };
 137 };
 138
 139 /* We store a list of expressions (expr_t) in a global table.
 140  *
 141  * When referencing one expression from another (eg. for EXPR_REPEAT)
 142  * we refer to the index into this table (node_id) instead of pointing
 143  * to the expr_t directly.
 144  *
 145  * This allows us to have nodes which reference each other or are
 146  * shared or removed without having to worry about reference counting.
 147  * The whole table is freed when the plugin is unloaded.
 148  *
 149  * As an optimization, the zeroth element (node_id == 0) is a common
 150  * EXPR_NULL.  (This is optimized automatically, don't use node_id 0
 151  * explicitly, in particular because if the table hasn't yet been
 152  * allocated then there is no zeroth element).
 153  */
 154 DEFINE_VECTOR_TYPE (expr_list, expr_t);
 155 static expr_list expr_table;
 156
 157 /* Add the expression to the table, returning the node_id. */
 158 static node_id
 159 new_node (const expr_t e)
 160 {
 161   if (expr_table.len == 0) {
 162     static const expr_t enull = { .t = EXPR_NULL };
 163     if (expr_list_append (&expr_table, enull) == -1)
 164       goto out_of_memory;
 165   }
 166   if (e.t == EXPR_NULL)
 167     return 0;
 168   if (expr_list_append (&expr_table, e) == -1) {
 169   out_of_memory:
 170     nbdkit_error ("realloc");
 171     exit (EXIT_FAILURE);
 172   }
 173   return expr_table.len-1;
 174 }
 175
 176 /* Get an expression by node_id. */
 177 static expr_t
 178 get_node (node_id id)
 179 {
 180   assert (id < expr_table.len);
 181   return expr_table.ptr[id];
 182 }
 183
 184 static void
 185 free_expr_table (void)
 186 {
 187   size_t i;
 188   expr_t e;
 189
 190   for (i = 0; i < expr_table.len; ++i) {
 191     e = get_node (i);
 192     switch (e.t) {
 193     case EXPR_LIST:   free (e.list.ptr); break;
 194     case EXPR_FILE:   free (e.filename); break;
 195     case EXPR_SCRIPT: free (e.script); break;
 196     case EXPR_STRING: free (e.string.ptr); break;
 197     case EXPR_NAME:   free (e.name); break;
 198     case EXPR_ASSIGN: free (e.a.name); break;
 199     default: break;
 200     }
 201   }
 202
 203   expr_list_reset (&expr_table);
 204 }
 205
 206 /* Construct an expression. */
 207 static expr_t
 208 expr (enum expr_type t, ...)
 209 {
 210   expr_t e = { .t = t };
 211   va_list args;
 212
 213   /* Note that you cannot pass uint8_t through varargs, so for the
 214    * byte fields we use int here.
 215    */
 216   va_start (args, t);
 217   switch (t) {
 218   case EXPR_NULL: /* nothing */ break;
 219   case EXPR_LIST: e.list = va_arg (args, node_ids); break;
 220   case EXPR_BYTE: e.b = va_arg (args, int); break;
 221   case EXPR_ABS_OFFSET:
 222   case EXPR_ALIGN_OFFSET:
 223     e.ui = va_arg (args, uint64_t);
 224     break;
 225   case EXPR_REL_OFFSET: e.i = va_arg (args, int64_t); break;
 226   case EXPR_FILE: e.filename = va_arg (args, char *); break;
 227   case EXPR_SCRIPT: e.script = va_arg (args, char *); break;
 228   case EXPR_STRING: e.string = va_arg (args, string); break;
 229   case EXPR_FILL:
 230     e.fl.b = va_arg (args, int);
 231     e.fl.n = va_arg (args, uint64_t);
 232     break;
 233   case EXPR_NAME: e.name = va_arg (args, char *); break;
 234   case EXPR_ASSIGN:
 235     e.a.name = va_arg (args, char *);
 236     e.a.id = va_arg (args, node_id);
 237     break;
 238   case EXPR_REPEAT:
 239     e.r.id = va_arg (args, node_id);
 240     e.r.n = va_arg (args, uint64_t);
 241     break;
 242   case EXPR_SLICE:
 243     e.sl.id = va_arg (args, node_id);
 244     e.sl.n = va_arg (args, uint64_t);
 245     e.sl.m = va_arg (args, int64_t);
 246     break;
 247   }
 248   va_end (args);
 249
 250   return e;
 251 }
 252
 253 /* Make a shallow copy of an expression. */
 254 static expr_t
 255 copy_expr (expr_t e)
 256 {
 257   switch (e.t) {
 258     /* These have fields that have to be duplicated. */
 259   case EXPR_LIST:
 260     if (node_ids_duplicate (&e.list, &e.list) == -1) {
 261       nbdkit_error ("malloc");
 262       exit (EXIT_FAILURE);
 263     }
 264     break;
 265   case EXPR_FILE:
 266     e.filename = strdup (e.filename);
 267     if (e.filename == NULL) {
 268       nbdkit_error ("strdup");
 269       exit (EXIT_FAILURE);
 270     };
 271     break;
 272   case EXPR_SCRIPT:
 273     e.script = strdup (e.script);
 274     if (e.script == NULL) {
 275       nbdkit_error ("strdup");
 276       exit (EXIT_FAILURE);
 277     };
 278     break;
 279   case EXPR_STRING:
 280     if (string_duplicate (&e.string, &e.string) == -1) {
 281       nbdkit_error ("malloc");
 282       exit (EXIT_FAILURE);
 283     }
 284     break;
 285   case EXPR_NAME:
 286     e.name = strdup (e.name);
 287     if (e.name == NULL) {
 288       nbdkit_error ("strdup");
 289       exit (EXIT_FAILURE);
 290     };
 291     break;
 292   case EXPR_ASSIGN:
 293     e.a.name = strdup (e.a.name);
 294     if (e.a.name == NULL) {
 295       nbdkit_error ("strdup");
 296       exit (EXIT_FAILURE);
 297     };
 298     break;
 299
 300     /* These don't require anything special. */
 301   case EXPR_NULL:
 302   case EXPR_BYTE:
 303   case EXPR_ABS_OFFSET:
 304   case EXPR_REL_OFFSET:
 305   case EXPR_ALIGN_OFFSET:
 306   case EXPR_FILL:
 307   case EXPR_REPEAT:
 308   case EXPR_SLICE:
 309     break;
 310   }
 311
 312   return e;
 313 }
 314
 315 static const char *
 316 debug_indent (int level)
 317 {
 318   static const char spaces[41] = "                                        ";
 319
 320   if (level >= 10)
 321     return spaces;
 322   else
 323     return &spaces[40-4*level];
 324 }
 325
 326 static void
 327 debug_expr (node_id id, int level)
 328 {
 329   const expr_t e = get_node (id);
 330   size_t i;
 331
 332   switch (e.t) {
 333   case EXPR_NULL:
 334     nbdkit_debug ("%snull", debug_indent (level));
 335     break;
 336   case EXPR_LIST:
 337     nbdkit_debug ("%s(", debug_indent (level));
 338     for (i = 0; i < e.list.len; ++i)
 339       debug_expr (e.list.ptr[i], level+1);
 340     nbdkit_debug ("%s)", debug_indent (level));
 341     break;
 342   case EXPR_BYTE:
 343     nbdkit_debug ("%s%" PRIu8, debug_indent (level), e.b);
 344     break;
 345   case EXPR_ABS_OFFSET:
 346     nbdkit_debug ("%s@%" PRIu64, debug_indent (level), e.ui);
 347     break;
 348   case EXPR_REL_OFFSET:
 349     nbdkit_debug ("%s@%" PRIi64, debug_indent (level), e.i);
 350     break;
 351   case EXPR_ALIGN_OFFSET:
 352     nbdkit_debug ("%s@^%" PRIi64, debug_indent (level), e.ui);
 353     break;
 354   case EXPR_FILE:
 355     nbdkit_debug ("%s<%s", debug_indent (level), e.filename);
 356     break;
 357   case EXPR_SCRIPT:
 358     nbdkit_debug ("%s<(%s)", debug_indent (level), e.script);
 359     break;
 360   case EXPR_STRING: {
 361     CLEANUP_FREE_STRING string s = empty_vector;
 362
 363     for (i = 0; i < e.string.len; ++i) {
 364       char c = e.string.ptr[i];
 365       if (ascii_isprint ((char) c))
 366         string_append (&s, e.string.ptr[i]);
 367       else {
 368         string_append (&s, '\\');
 369         string_append (&s, 'x');
 370         string_append (&s, hexchar (c >> 4));
 371         string_append (&s, hexchar (c));
 372       }
 373     }
 374     string_append (&s, '\0');
 375     nbdkit_debug ("%s\"%s\"", debug_indent (level), s.ptr);
 376     break;
 377   }
 378   case EXPR_FILL:
 379     nbdkit_debug ("%sfill(%" PRIu8 "*%" PRIu64 ")",
 380                   debug_indent (level), e.fl.b, e.fl.n);
 381     break;
 382   case EXPR_NAME:
 383     nbdkit_debug ("%s\\%s", debug_indent (level), e.name);
 384     break;
 385   case EXPR_ASSIGN:
 386     nbdkit_debug ("%s(", debug_indent (level));
 387     debug_expr (e.a.id, level+1);
 388     nbdkit_debug ("%s) -> \\%s", debug_indent (level), e.a.name);
 389     break;
 390   case EXPR_REPEAT:
 391     nbdkit_debug ("%s(", debug_indent (level));
 392     debug_expr (e.r.id, level+1);
 393     nbdkit_debug ("%s) *%" PRIu64, debug_indent (level), e.r.n);
 394     break;
 395   case EXPR_SLICE:
 396     nbdkit_debug ("%s(", debug_indent (level));
 397     debug_expr (e.sl.id, level+1);
 398     nbdkit_debug ("%s)[%" PRIu64 ":%" PRIi64 "]",
 399                   debug_indent (level), e.sl.n, e.sl.m);
 400     break;
 401   }
 402 }
 403
 404 /* Is the expression something which contains data, or something else
 405  * (like an offset).  Just a light check to reject nonsense
 406  * expressions like "@0 * 10".
 407  */
 408 static bool
 409 is_data_expr (const expr_t e)
 410 {
 411   return e.t != EXPR_ABS_OFFSET && e.t != EXPR_REL_OFFSET
 412     && e.t != EXPR_ALIGN_OFFSET;
 413 }
 414
 415 /* Simple dictionary of name -> expression. */
 416 typedef struct dict dict_t;
 417 struct dict {
 418   dict_t *next;
 419   const char *name;             /* Name excluding \ character. */
 420   node_id id;                   /* Associated expression. */
 421 };
 422
 423 static int parser (int level, const char *value, size_t *start, size_t len,
 424                    node_id *root_rtn);
 425 static int optimize_ast (node_id root, node_id *root_rtn);
 426 static int evaluate (const dict_t *dict, node_id root,
 427                      struct allocator *a,
 428                      uint64_t *offset, uint64_t *size);
 429
 430 int
 431 read_data_format (const char *value, struct allocator *a, uint64_t *size_rtn)
 432 {
 433   size_t i = 0;
 434   node_id root;
 435   uint64_t offset = 0;
 436   int r = -1;
 437
 438   assert (expr_table.len == 0);
 439
 440   /* Run the parser across the entire string, returning the top level
 441    * expression.
 442    */
 443   if (parser (0, value, &i, strlen (value), &root) == -1)
 444     goto out;
 445
 446   if (optimize_ast (root, &root) == -1)
 447     goto out;
 448
 449   if (data_debug_AST) {
 450     nbdkit_debug ("BEGIN AST (-D data.AST=1)");
 451     debug_expr (root, 0);
 452     nbdkit_debug ("END AST");
 453   }
 454
 455   /* Evaluate the expression into the allocator. */
 456   r = evaluate (NULL, root, a, &offset, size_rtn);
 457
 458  out:
 459   free_expr_table ();
 460   return r;
 461 }
 462
 463 static int parse_string (const char *value, size_t *start, size_t len,
 464                          string *rtn);
 465 static int parse_word (const char *value, size_t *start, size_t len,
 466                        string *rtn);
 467 static size_t get_name (const char *value, size_t i, size_t len,
 468                         size_t *initial);
 469 static size_t get_var (const char *value, size_t i, size_t len,
 470                        size_t *initial);
 471 static size_t get_script (const char *value, size_t i, size_t len);
 472
 473 /* This is the format parser. */
 474 static int
 475 parser (int level, const char *value, size_t *start, size_t len,
 476         node_id *rtn)
 477 {
 478   size_t i = *start;
 479   /* List of node_ids that we are building up at this level.  This is
 480    * leaked on error paths, but we're going to call exit(1).
 481    */
 482   node_ids list = empty_vector;
 483
 484   while (i < len) {
 485 #define APPEND_EXPR(node_id)                                  \
 486     do {                                                      \
 487       if (node_ids_append (&list, (node_id)) == -1) {         \
 488         nbdkit_error ("realloc: %m");                         \
 489         exit (EXIT_FAILURE);                                  \
 490       }                                                       \
 491     } while (0)
 492     node_id id;
 493     int j, n;
 494     int64_t i64, m;
 495     size_t flen;
 496
 497     switch (value[i]) {
 498     case '#':                   /* # comment */
 499       i++;
 500       while (i < len && value[i] != '\n')
 501         i++;
 502       break;
 503
 504     case '@':                   /* @OFFSET */
 505       if (++i == len) goto parse_error;
 506       switch (value[i]) {
 507       case '+':                 /* @+N */
 508         if (++i == len) goto parse_error;
 509         if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
 510           if (i64 < 0) {
 511             nbdkit_error ("data parameter after @+ must not be negative");
 512             return -1;
 513           }
 514           i += n;
 515           APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET, i64)));
 516         }
 517         else
 518           goto parse_error;
 519         break;
 520       case '-':                 /* @-N */
 521         if (++i == len) goto parse_error;
 522         if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
 523           if (i64 < 0) {
 524             nbdkit_error ("data parameter after @- must not be negative");
 525             return -1;
 526           }
 527           i += n;
 528           APPEND_EXPR (new_node (expr (EXPR_REL_OFFSET, -i64)));
 529         }
 530         else
 531           goto parse_error;
 532         break;
 533       case '^':                 /* @^ALIGNMENT */
 534         if (++i == len) goto parse_error;
 535         /* We must use %i into i64 in order to parse 0x etc. */
 536         if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
 537           if (i64 < 0) {
 538             nbdkit_error ("data parameter after @^ must not be negative");
 539             return -1;
 540           }
 541           /* XXX fix this arbitrary restriction */
 542           if (!is_power_of_2 (i64)) {
 543             nbdkit_error ("data parameter @^%" PRIi64 " must be a power of 2",
 544                           i64);
 545             return -1;
 546           }
 547           i += n;
 548           APPEND_EXPR (new_node (expr (EXPR_ALIGN_OFFSET, (uint64_t) i64)));
 549         }
 550         else
 551           goto parse_error;
 552         break;
 553       case '0': case '1': case '2': case '3': case '4':
 554       case '5': case '6': case '7': case '8': case '9':
 555         /* We must use %i into i64 in order to parse 0x etc. */
 556         if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
 557           if (i64 < 0) {
 558             nbdkit_error ("data parameter @OFFSET must not be negative");
 559             return -1;
 560           }
 561           i += n;
 562           APPEND_EXPR (new_node (expr (EXPR_ABS_OFFSET, (uint64_t) i64)));
 563         }
 564         else
 565           goto parse_error;
 566         break;
 567       default:
 568         goto parse_error;
 569       }
 570       break;
 571
 572     case '(':                   /* ( */
 573       i++;
 574
 575       /* Call self recursively. */
 576       if (parser (level+1, value, &i, len, &id) == -1)
 577         return -1;
 578       /* parser() always returns a list. */
 579       assert (get_node (id).t == EXPR_LIST);
 580       APPEND_EXPR (id);
 581       break;
 582
 583     case ')':                   /* ) */
 584       if (level < 1) {
 585         nbdkit_error ("unmatched ')' in data string");
 586         return -1;
 587       }
 588       i++;
 589       goto out;
 590
 591     case '*':                   /* expr*N */
 592       i++;
 593       if (list.len == 0) {
 594         nbdkit_error ("*N must follow an expression");
 595         return -1;
 596       }
 597       if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
 598         nbdkit_error ("*N cannot be applied to this type of expression");
 599         return -1;
 600       }
 601       if (sscanf (&value[i], "%" SCNi64 "%n", &i64, &n) == 1) {
 602         if (i64 < 0) {
 603           nbdkit_error ("data parameter @OFFSET must not be negative");
 604           return -1;
 605         }
 606         i += n;
 607       }
 608       else {
 609         nbdkit_error ("*N not numeric");
 610         return -1;
 611       }
 612       id = list.ptr[list.len-1];
 613       list.len--;
 614       APPEND_EXPR (new_node (expr (EXPR_REPEAT, id, (uint64_t) i64)));
 615       break;
 616
 617     case '[':                 /* expr[k:m] */
 618       i++;
 619       if (list.len == 0) {
 620         nbdkit_error ("[N:M] must follow an expression");
 621         return -1;
 622       }
 623       if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
 624         nbdkit_error ("[N:M] cannot be applied to this type of expression");
 625         return -1;
 626       }
 627       i64 = 0;
 628       m = -1;
 629       if (sscanf (&value[i], "%" SCNi64 ":%" SCNi64 "]%n",
 630                   &i64, &m, &n) == 2 ||
 631           sscanf (&value[i], ":%" SCNi64 "]%n", &m, &n) == 1 ||
 632           sscanf (&value[i], "%" SCNi64 ":]%n", &i64, &n) == 1)
 633         i += n;
 634       else if (strncmp (&value[i], ":]", 2) == 0)
 635         i += 2;
 636       else {
 637         nbdkit_error ("enclosed pattern (...)[N:M] not numeric");
 638         return -1;
 639       }
 640       id = list.ptr[list.len-1];
 641       list.len--;
 642       APPEND_EXPR (new_node (expr (EXPR_SLICE, id, i64, m)));
 643       break;
 644
 645     case '<':
 646       if (i+1 < len && value[i+1] == '(') { /* <(SCRIPT) */
 647         char *script;
 648
 649         i += 2;
 650
 651         flen = get_script (value, i, len);
 652         if (flen == 0) goto parse_error;
 653         script = strndup (&value[i], flen);
 654         if (script == NULL) {
 655           nbdkit_error ("strndup: %m");
 656           return -1;
 657         }
 658         i += flen + 1;          /* +1 for trailing ) */
 659         APPEND_EXPR (new_node (expr (EXPR_SCRIPT, script)));
 660       }
 661       else {                    /* <FILE */
 662         char *filename;
 663
 664         i++;
 665
 666         /* The filename follows next in the string. */
 667         flen = strcspn (&value[i], "*[) \t\n");
 668         if (flen == 0) {
 669           nbdkit_error ("data parameter <FILE not a filename");
 670           return -1;
 671         }
 672         filename = strndup (&value[i], flen);
 673         if (filename == NULL) {
 674           nbdkit_error ("strndup: %m");
 675           return -1;
 676         }
 677         i += flen;
 678         APPEND_EXPR (new_node (expr (EXPR_FILE, filename)));
 679       }
 680       break;
 681
 682     case '"': {                 /* "String" */
 683       string str = empty_vector;
 684
 685       i++;
 686       if (parse_string (value, &i, len, &str) == -1)
 687         return -1;
 688       APPEND_EXPR (new_node (expr (EXPR_STRING, str)));
 689       break;
 690     }
 691
 692     case '\\': {                /* \\NAME */
 693       char *name;
 694
 695       flen = get_name (value, i, len, &i);
 696       if (flen == 0) goto parse_error;
 697       name = strndup (&value[i], flen);
 698       if (name == NULL) {
 699         nbdkit_error ("strndup: %m");
 700         return -1;
 701       }
 702       i += flen;
 703       APPEND_EXPR (new_node (expr (EXPR_NAME, name)));
 704       break;
 705     }
 706
 707     case '-': {                 /* -> \\NAME */
 708       char *name;
 709
 710       i++;
 711       if (value[i] != '>') goto parse_error;
 712       i++;
 713       if (list.len == 0) {
 714         nbdkit_error ("-> must follow an expression");
 715         return -1;
 716       }
 717       if (! is_data_expr (get_node (list.ptr[list.len-1]))) {
 718         nbdkit_error ("-> cannot be applied to this type of expression");
 719         return -1;
 720       }
 721       flen = get_name (value, i, len, &i);
 722       if (flen == 0) goto parse_error;
 723       name = strndup (&value[i], flen);
 724       if (name == NULL) {
 725         nbdkit_error ("strndup: %m");
 726         return -1;
 727       }
 728       id = list.ptr[list.len-1];
 729       i += flen;
 730       list.len--;
 731       APPEND_EXPR (new_node (expr (EXPR_ASSIGN, name, id)));
 732       break;
 733     }
 734
 735     case '$': {                 /* $VAR */
 736       CLEANUP_FREE char *name = NULL;
 737       const char *content;
 738       size_t ci;
 739
 740       flen = get_var (value, i, len, &i);
 741       if (flen == 0) goto parse_error;
 742       name = strndup (&value[i], flen);
 743       if (name == NULL) {
 744         nbdkit_error ("strndup: %m");
 745         return -1;
 746       }
 747       i += flen;
 748
 749       /* Look up the variable. */
 750       content = get_extra_param (name);
 751       if (!content) {
 752         content = getenv (name);
 753         if (!content) {
 754           nbdkit_error ("$%s: variable not found", name);
 755           return -1;
 756         }
 757       }
 758
 759       /* Call self recursively on the variable content. */
 760       ci = 0;
 761       if (parser (0, content, &ci, strlen (content), &id) == -1)
 762         return -1;
 763       /* parser() always returns a list. */
 764       assert (get_node (id).t == EXPR_LIST);
 765       APPEND_EXPR (id);
 766       break;
 767     }
 768
 769     case '0': case '1': case '2': case '3': case '4': /* BYTE */
 770     case '5': case '6': case '7': case '8': case '9':
 771       /* We need to use %i here so it scans 0x etc correctly. */
 772       if (sscanf (&value[i], "%i%n", &j, &n) == 1)
 773         i += n;
 774       else
 775         goto parse_error;
 776       if (j < 0 || j > 255) {
 777         nbdkit_error ("data parameter BYTE must be in the range 0..255");
 778         return -1;
 779       }
 780       APPEND_EXPR (new_node (expr (EXPR_BYTE, j)));
 781       break;
 782
 783     case 'l': case 'b': {       /* le or be + NN: + WORD */
 784       string str = empty_vector;
 785
 786       if (parse_word (value, &i, len, &str) == -1)
 787         return -1;
 788       APPEND_EXPR (new_node (expr (EXPR_STRING, str)));
 789       break;
 790     }
 791
 792     case ' ': case '\t': case '\n': /* Skip whitespace. */
 793     case '\f': case '\r': case '\v':
 794       i++;
 795       break;
 796
 797     default:
 798     parse_error:
 799       nbdkit_error ("data parameter: parsing error at offset %zu", i);
 800       return -1;
 801     } /* switch */
 802   } /* for */
 803
 804   /* If we reach the end of the string and level != 0 that means
 805    * there is an unmatched '(' in the string.
 806    */
 807   if (level > 0) {
 808     nbdkit_error ("unmatched '(' in data string");
 809     return -1;
 810   }
 811
 812  out:
 813   *start = i;
 814
 815   /* Return the node ID. */
 816   *rtn = new_node (expr (EXPR_LIST, list));
 817   return 0;
 818 }
 819
 820 /* Return the next \NAME in the input.  This skips whitespace, setting
 821  * *initial to the index of the start of the NAME (minus backslash).
 822  * It returns the length of NAME (minus backslash), or 0 if not found.
 823  */
 824 static size_t
 825 get_name (const char *value, size_t i, size_t len, size_t *initial)
 826 {
 827   size_t r = 0;
 828
 829   while (i < len) {
 830     if (!ascii_isspace (value[i]))
 831       break;
 832     i++;
 833   }
 834
 835   if (i >= len || value[i] != '\\') return 0;
 836   i++;
 837   if (i >= len) return 0;
 838   *initial = i;
 839
 840   while (i < len &&
 841          (ascii_isalnum (value[i]) || value[i] == '_' || value[i] == '-')) {
 842     i++;
 843     r++;
 844   }
 845
 846   return r;
 847 }
 848
 849 /* Like get_name above, but for $VAR variables.  The accepted variable
 850  * name is /\$[a-z_][a-z0-9_]+/i
 851  */
 852 static size_t
 853 get_var (const char *value, size_t i, size_t len, size_t *initial)
 854 {
 855   size_t r = 0;
 856
 857   while (i < len) {
 858     if (!ascii_isspace (value[i]))
 859       break;
 860     i++;
 861   }
 862
 863   if (i >= len || value[i] != '$') return 0;
 864   i++;
 865   if (i >= len) return 0;
 866   *initial = i;
 867
 868   if (!ascii_isalpha (value[i]) && value[i] != '_')
 869     return 0;
 870
 871   while (i < len && (ascii_isalnum (value[i]) || value[i] == '_')) {
 872     i++;
 873     r++;
 874   }
 875
 876   return r;
 877 }
 878
 879 /* Find end of a <(SCRIPT), ignoring nested (). */
 880 static size_t
 881 get_script (const char *value, size_t i, size_t len)
 882 {
 883   int lvl = 0;
 884   size_t r = 0;
 885
 886   for (; i < len; ++i, ++r) {
 887     if (value[i] == '(')
 888       lvl++;
 889     else if (value[i] == ')') {
 890       if (lvl > 0)
 891         lvl--;
 892       else
 893         break;
 894     }
 895   }
 896
 897   if (i >= len)
 898     return 0;
 899
 900   return r;
 901 }
 902
 903 /* Parse a "String" with C-like escaping, and store it in the string
 904  * vector.  When this is called we have already consumed the initial
 905  * double quote character.
 906  */
 907 static int
 908 parse_string (const char *value, size_t *start, size_t len, string *rtn)
 909 {
 910   size_t i = *start;
 911   unsigned char c, x0, x1;
 912
 913   *rtn = (string) empty_vector;
 914
 915   for (; i < len; ++i) {
 916     c = value[i];
 917     switch (c) {
 918     case '"':
 919       /* End of the string. */
 920       *start = i+1;
 921       return 0;
 922
 923     case '\\':
 924       /* Start of escape sequence. */
 925       if (++i == len) goto unexpected_end_of_string;
 926       c = value[i];
 927       switch (c) {
 928       case 'a': c = 0x7; break;
 929       case 'b': c = 0x8; break;
 930       case 'f': c = 0xc; break;
 931       case 'n': c = 0xa; break;
 932       case 'r': c = 0xd; break;
 933       case 't': c = 0x9; break;
 934       case 'v': c = 0xb; break;
 935       case '\\': case '"': break;
 936       case 'x':
 937         if (++i == len) goto unexpected_end_of_string;
 938         x0 = value[i];
 939         if (++i == len) goto unexpected_end_of_string;
 940         x1 = value[i];
 941         if (!ascii_isxdigit (x0) || !ascii_isxdigit (x1)) {
 942           nbdkit_error ("data: \\xNN must be followed by exactly "
 943                         "two hexadecimal characters");
 944           return -1;
 945         }
 946         c = hexbyte (x0, x1);
 947         break;
 948       case '0': case '1': case '2': case '3': case '4':
 949       case '5': case '6': case '7': case '8': case '9':
 950       case 'u':
 951         nbdkit_error ("data: string numeric and unicode sequences "
 952                       "are not yet implemented");
 953         return -1;
 954       }
 955       /*FALLTHROUGH*/
 956     default:
 957       /* Any other character is added to the string. */
 958       if (string_append (rtn, c) == -1) {
 959         nbdkit_error ("realloc: %m");
 960         return -1;
 961       }
 962     }
 963   }
 964
 965   /* If we reach here then we have run off the end of the data string
 966    * without finding the final quote.
 967    */
 968  unexpected_end_of_string:
 969   nbdkit_error ("data parameter: unterminated string");
 970   return -1;
 971 }
 972
 973 /* Parse le<NN>:WORD be<NN>:WORD expressions.  These are parsed into strings. */
 974 static int
 975 parse_word (const char *value, size_t *start, size_t len, string *rtn)
 976 {
 977   size_t i = *start;
 978   CLEANUP_FREE_STRING string copy = empty_vector;
 979   size_t n;
 980   enum { little, big } endian;
 981   uint16_t u16;
 982   uint32_t u32;
 983   uint64_t u64;
 984
 985   *rtn = (string) empty_vector;
 986
 987   /* It's convenient to use the nbdkit_parse* functions below because
 988    * they deal already with overflow etc.  However these functions
 989    * require a \0-terminated string, so we have to copy what we are
 990    * parsing to a new string here.
 991    */
 992   while (i < len) {
 993     if (ascii_isspace (value[i]))
 994       break;
 995     i++;
 996   }
 997   n = i - *start;
 998   assert (n > 0);          /* must be because caller parsed 'l'/'b' */
 999   if (string_reserve (&copy, n + 1) == -1) {
1000     nbdkit_error ("realloc: %m");
1001     return -1;
1002   }
1003   memcpy (copy.ptr, &value[*start], n);
1004   copy.ptr[n] = '\0';
1005   copy.len = n + 1;
1006   *start = i;
1007
1008   /* Reserve enough space in the return buffer for the longest
1009    * possible bitstring (64 bits / 8 bytes).
1010    */
1011   if (string_reserve (rtn, 8) == -1) {
1012     nbdkit_error ("realloc: %m");
1013     return -1;
1014   }
1015
1016   /* Parse the rest of {le|be}{16|32|64}: */
1017   if (strncmp (copy.ptr, "le16:", 5) == 0) {
1018     endian = little; rtn->len = 2;
1019   }
1020   else if (strncmp (copy.ptr, "le32:", 5) == 0) {
1021     endian = little; rtn->len = 4;
1022   }
1023   else if (strncmp (copy.ptr, "le64:", 5) == 0) {
1024     endian = little; rtn->len = 8;
1025   }
1026   else if (strncmp (copy.ptr, "be16:", 5) == 0) {
1027     endian = big; rtn->len = 2;
1028   }
1029   else if (strncmp (copy.ptr, "be32:", 5) == 0) {
1030     endian = big; rtn->len = 4;
1031   }
1032   else if (strncmp (copy.ptr, "be64:", 5) == 0) {
1033     endian = big; rtn->len = 8;
1034   }
1035   else {
1036     nbdkit_error ("data parameter: expected \"le16/32/64:\" "
1037                   "or \"be16/32/64:\" at offset %zu", i);
1038     return -1;
1039   }
1040
1041   /* Parse the word field into a host-order unsigned int. */
1042   switch (rtn->len) {
1043   case 2:
1044     if (nbdkit_parse_uint16_t ("data", &copy.ptr[5], &u16) == -1)
1045       return -1;
1046     break;
1047   case 4:
1048     if (nbdkit_parse_uint32_t ("data", &copy.ptr[5], &u32) == -1)
1049       return -1;
1050     break;
1051   case 8:
1052     if (nbdkit_parse_uint64_t ("data", &copy.ptr[5], &u64) == -1)
1053       return -1;
1054     break;
1055   default: abort ();
1056   }
1057
1058   /* Now depending on the endianness and size, convert to the final
1059    * bitstring.
1060    */
1061   switch (endian) {
1062   case little:
1063     switch (rtn->len) {
1064     case 2:                     /* le16: */
1065       *((uint16_t *) rtn->ptr) = htole16 (u16);
1066       break;
1067     case 4:                     /* le32: */
1068       *((uint32_t *) rtn->ptr) = htole32 (u32);
1069       break;
1070     case 8:                     /* le64: */
1071       *((uint64_t *) rtn->ptr) = htole64 (u64);
1072       break;
1073     default: abort ();
1074     }
1075     break;
1076
1077   case big:
1078     switch (rtn->len) {
1079     case 2:                     /* be16: */
1080       *((uint16_t *) rtn->ptr) = htobe16 (u16);
1081       break;
1082     case 4:                     /* be32: */
1083       *((uint32_t *) rtn->ptr) = htobe32 (u32);
1084       break;
1085     case 8:                     /* be64: */
1086       *((uint64_t *) rtn->ptr) = htobe64 (u64);
1087       break;
1088     default: abort ();
1089     }
1090   }
1091
1092   return 0;
1093 }
1094
1095 /* This simple optimization pass over the AST simplifies some
1096  * expressions.
1097  */
1098 static bool expr_safe_to_inline (const expr_t);
1099 static bool list_safe_to_inline (const node_ids);
1100 static bool expr_is_single_byte (const expr_t, uint8_t *b);
1101 static bool exprs_can_combine (expr_t e0, expr_t e1, node_id *id_rtn);
1102
1103 static int
1104 optimize_ast (node_id root, node_id *root_rtn)
1105 {
1106   size_t i, j;
1107   node_id id;
1108   node_ids list = empty_vector;
1109   expr_t e2;
1110
1111   switch (get_node (root).t) {
1112   case EXPR_LIST:
1113     /* For convenience this makes a new list node. */
1114
1115     /* Optimize each element of the list. */
1116     for (i = 0; i < get_node (root).list.len; ++i) {
1117       id = get_node (root).list.ptr[i];
1118       if (optimize_ast (id, &id) == -1)
1119         return -1;
1120       switch (get_node (id).t) {
1121       case EXPR_NULL:
1122         /* null elements of a list can be ignored. */
1123         break;
1124       case EXPR_LIST:
1125         /* Simple lists can be inlined, but be careful with
1126          * assignments, offsets and other expressions which are scoped
1127          * because flattening the list changes the scope.
1128          */
1129         if (list_safe_to_inline (get_node (id).list)) {
1130           for (j = 0; j < get_node (id).list.len; ++j) {
1131             if (node_ids_append (&list, get_node (id).list.ptr[j]) == -1)
1132               goto list_append_error;
1133           }
1134           break;
1135         }
1136         /*FALLTHROUGH*/
1137       default:
1138         if (node_ids_append (&list, id) == -1) {
1139         list_append_error:
1140           nbdkit_error ("realloc: %m");
1141           return -1;
1142         }
1143       }
1144     }
1145
1146     /* Combine adjacent pairs of elements if possible. */
1147     for (i = 1; i < list.len; ++i) {
1148       node_id id0, id1;
1149
1150       id0 = list.ptr[i-1];
1151       id1 = list.ptr[i];
1152       if (exprs_can_combine (get_node (id0), get_node (id1), &id)) {
1153         list.ptr[i-1] = id;
1154         node_ids_remove (&list, i);
1155         --i;
1156       }
1157     }
1158
1159     /* List of length 0 is replaced with null. */
1160     if (list.len == 0) {
1161       free (list.ptr);
1162       *root_rtn = new_node (expr (EXPR_NULL));
1163       return 0;
1164     }
1165
1166     /* List of length 1 is replaced with the first element, but as
1167      * above avoid inlining if it is not a safe expression.
1168      */
1169     if (list.len == 1 && expr_safe_to_inline (get_node (list.ptr[0]))) {
1170       id = list.ptr[0];
1171       free (list.ptr);
1172       *root_rtn = id;
1173       return 0;
1174     }
1175
1176     *root_rtn = new_node (expr (EXPR_LIST, list));
1177     return 0;
1178
1179   case EXPR_ASSIGN:
1180     id = get_node (root).a.id;
1181     if (optimize_ast (id, &id) == -1)
1182       return -1;
1183     e2 = copy_expr (get_node (root));
1184     e2.a.id = id;
1185     *root_rtn = new_node (e2);
1186     return 0;
1187
1188   case EXPR_REPEAT:
1189     /* Repeating zero times can be replaced by null. */
1190     if (get_node (root).r.n == 0) {
1191       *root_rtn = new_node (expr (EXPR_NULL));
1192       return 0;
1193     }
1194     id = get_node (root).r.id;
1195     if (optimize_ast (id, &id) == -1)
1196       return -1;
1197
1198     /* expr*1 can be replaced with simply expr.
1199      * null*N can be replaced with null.
1200      */
1201     if (get_node (root).r.n == 1 || get_node (id).t == EXPR_NULL) {
1202       *root_rtn = id;
1203       return 0;
1204     }
1205     /* expr*X*Y can be replaced by expr*(X*Y). */
1206     if (get_node (id).t == EXPR_REPEAT) {
1207       *root_rtn = new_node (expr (EXPR_REPEAT,
1208                                   get_node (id).r.id,
1209                                   get_node (root).r.n * get_node (id).r.n));
1210       return 0;
1211     }
1212     /* fill(b,X)*Y can be replaced by fill(b,X*Y). */
1213     if (get_node (id).t == EXPR_FILL) {
1214       *root_rtn = new_node (expr (EXPR_FILL,
1215                                   get_node (id).fl.b,
1216                                   get_node (root).r.n * get_node (id).fl.n));
1217       return 0;
1218     }
1219     /* For short strings and small values or N, string*N can be
1220      * replaced by N copies of the string.
1221      */
1222     if (get_node (id).t == EXPR_STRING &&
1223         get_node (root).r.n <= 4 &&
1224         get_node (id).string.len <= 512) {
1225       string s = empty_vector;
1226       size_t n = get_node (root).r.n;
1227       const string sub = get_node (id).string;
1228
1229       for (i = 0; i < n; ++i) {
1230         for (j = 0; j < sub.len; ++j) {
1231           if (string_append (&s, sub.ptr[j]) == -1) {
1232             nbdkit_error ("realloc: %m");
1233             return -1;
1234           }
1235         }
1236       }
1237
1238       *root_rtn = new_node (expr (EXPR_STRING, s));
1239       return 0;
1240     }
1241     /* Single byte expression * N can be replaced by a fill. */
1242     {
1243       uint8_t b;
1244
1245       if (expr_is_single_byte (get_node (id), &b)) {
1246         *root_rtn = new_node (expr (EXPR_FILL, b, get_node (root).r.n));
1247         return 0;
1248       }
1249     }
1250
1251     e2 = copy_expr (get_node (root));
1252     e2.r.id = id;
1253     *root_rtn = new_node (e2);
1254     return 0;
1255
1256   case EXPR_SLICE: {
1257     uint64_t n = get_node (root).sl.n;
1258     int64_t m = get_node (root).sl.m;
1259     uint64_t len;
1260
1261     /* A zero-length slice can be replaced by null. */
1262     if (n == m) {
1263       *root_rtn = new_node (expr (EXPR_NULL));
1264       return 0;
1265     }
1266     id = get_node (root).sl.id;
1267     if (optimize_ast (id, &id) == -1)
1268       return -1;
1269
1270     /* Some constant expressions can be sliced into something shorter.
1271      * Be conservative.  If the slice is invalid then we prefer to do
1272      * nothing here because the whole expression might be optimized
1273      * away and if it isn't then we will give an error later.
1274      */
1275     switch (get_node (id).t) {
1276     case EXPR_NULL:             /* null[:0] or null[0:] => null */
1277       if (m == 0 || (n == 0 && m == -1)) {
1278         *root_rtn = new_node (expr (EXPR_NULL));
1279         return 0;
1280       }
1281       break;
1282     case EXPR_BYTE:             /* byte[:1] or byte[0:] => byte */
1283       if (m == 1 || (n == 0 && m == -1)) {
1284         *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).b));
1285         return 0;
1286       }
1287       break;
1288     case EXPR_STRING:           /* substring */
1289       len = get_node (id).string.len;
1290       if (m >= 0 && n <= m && m <= len) {
1291         if (m-n == 1)
1292           *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).string.ptr[n]));
1293         else {
1294           string sub = substring (get_node (id).string, n, m-n);
1295           *root_rtn = new_node (expr (EXPR_STRING, sub));
1296         }
1297         return 0;
1298       }
1299       if (m == -1 && n <= len) {
1300         if (len-n == 1)
1301           *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).string.ptr[n]));
1302         else {
1303           string sub = substring (get_node (id).string, n, len-n);
1304           *root_rtn = new_node (expr (EXPR_STRING, sub));
1305         }
1306         return 0;
1307       }
1308       break;
1309     case EXPR_FILL:             /* slice of a fill is a shorter fill */
1310       len = get_node (id).fl.n;
1311       if (m >= 0 && n <= m && m <= len) {
1312         if (m-n == 1)
1313           *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).fl.b));
1314         else
1315           *root_rtn = new_node (expr (EXPR_FILL, get_node (id).fl.b, m-n));
1316         return 0;
1317       }
1318       if (m == -1 && n <= len) {
1319         if (len-n == 1)
1320           *root_rtn = new_node (expr (EXPR_BYTE, get_node (id).fl.b));
1321         else
1322           *root_rtn = new_node (expr (EXPR_FILL, get_node (id).fl.b, len-n));
1323         return 0;
1324       }
1325       break;
1326     default: ;
1327     }
1328
1329     e2 = copy_expr (get_node (root));
1330     e2.sl.id = id;
1331     *root_rtn = new_node (e2);
1332     return 0;
1333   }
1334
1335   case EXPR_STRING:
1336     /* A zero length string can be replaced with null. */
1337     if (get_node (root).string.len == 0) {
1338       *root_rtn = new_node (expr (EXPR_NULL));
1339       return 0;
1340     }
1341     /* Strings containing the same character can be replaced by a
1342      * fill.  These can be produced by other optimizations.
1343      */
1344     if (get_node (root).string.len > 1) {
1345       const string s = get_node (root).string;
1346       uint8_t b = s.ptr[0];
1347
1348       for (i = 1; i < s.len; ++i)
1349         if (s.ptr[i] != b)
1350           break;
1351
1352       if (i == s.len) {
1353         *root_rtn = new_node (expr (EXPR_FILL, b, (uint64_t) s.len));
1354         return 0;
1355       }
1356     }
1357     *root_rtn = root;
1358     return 0;
1359
1360   case EXPR_FILL:
1361     /* Zero-length fill can be replaced by null. */
1362     if (get_node (root).fl.n == 0) {
1363       *root_rtn = new_node (expr (EXPR_NULL));
1364       return 0;
1365     }
1366     *root_rtn = root;
1367     return 0;
1368
1369   case EXPR_NULL:
1370   case EXPR_BYTE:
1371   case EXPR_ABS_OFFSET:
1372   case EXPR_REL_OFFSET:
1373   case EXPR_ALIGN_OFFSET:
1374   case EXPR_FILE:
1375   case EXPR_SCRIPT:
1376   case EXPR_NAME:
1377     *root_rtn = root;
1378     return 0;
1379   }
1380
1381   abort ();
1382 }
1383
1384 /* Test if an expression can be safely inlined in a superior list
1385  * without changing the meaning of any scoped expressions.
1386  */
1387 static bool
1388 expr_safe_to_inline (const expr_t e)
1389 {
1390   switch (e.t) {
1391     /* Assignments and named expressions are scoped. */
1392   case EXPR_ASSIGN:
1393   case EXPR_NAME:
1394     return false;
1395
1396     /* @Offsets are scoped. */
1397   case EXPR_ABS_OFFSET:
1398   case EXPR_REL_OFFSET:
1399   case EXPR_ALIGN_OFFSET:
1400     return false;
1401
1402     /* Everything else should be safe. */
1403   case EXPR_NULL:
1404   case EXPR_LIST:
1405   case EXPR_BYTE:
1406   case EXPR_FILE:
1407   case EXPR_SCRIPT:
1408   case EXPR_STRING:
1409   case EXPR_FILL:
1410   case EXPR_REPEAT:
1411   case EXPR_SLICE:
1412     ;
1413   }
1414
1415   return true;
1416 }
1417
1418 /* Test if a list of expressions is safe to inline in a superior list. */
1419 static bool
1420 list_safe_to_inline (const node_ids list)
1421 {
1422   size_t i;
1423
1424   for (i = 0; i < list.len; ++i) {
1425     if (!expr_safe_to_inline (get_node (list.ptr[i])))
1426       return false;
1427   }
1428
1429   return true;
1430 }
1431
1432 /* For some constant expressions which are a length 1 byte, return
1433  * true and the byte.
1434  */
1435 static bool
1436 expr_is_single_byte (const expr_t e, uint8_t *b)
1437 {
1438   switch (e.t) {
1439   case EXPR_BYTE:               /* A single byte. */
1440     if (b) *b = e.b;
1441     return true;
1442   case EXPR_LIST:               /* A single element list if it is single byte */
1443     if (e.list.len != 1)
1444       return false;
1445     return expr_is_single_byte (get_node (e.list.ptr[0]), b);
1446   case EXPR_STRING:             /* A length-1 string. */
1447     if (e.string.len != 1)
1448       return false;
1449     if (b) *b = e.string.ptr[0];
1450     return true;
1451   case EXPR_FILL:               /* A length-1 fill. */
1452     if (e.fl.n != 1)
1453       return false;
1454     if (b) *b = e.fl.b;
1455     return true;
1456   case EXPR_REPEAT:             /* EXPR*1 if EXPR is single byte */
1457     if (e.r.n != 1)
1458       return false;
1459     return expr_is_single_byte (get_node (e.r.id), b);
1460   default:
1461     return false;
1462   }
1463 }
1464
1465 /* Test if two adjacent constant expressions can be combined and if so
1466  * return a new expression which is the combination of both.  For
1467  * example, two bytes are combined into a string (1 2 => "\x01\x02"),
1468  * or a string and a byte into a longer string ("\x01\x02" 3 =>
1469  * "\x01\x02\x03").
1470  */
1471 static bool
1472 exprs_can_combine (expr_t e0, expr_t e1, node_id *id_rtn)
1473 {
1474   string s = empty_vector;
1475   size_t len, len1;
1476   expr_t e2;
1477
1478   switch (e0.t) {
1479   case EXPR_BYTE:
1480     switch (e1.t) {
1481     case EXPR_BYTE:             /* byte byte => fill | string */
1482       if (e0.b == e1.b) {
1483         *id_rtn = new_node (expr (EXPR_FILL, e0.b, UINT64_C (2)));
1484       }
1485       else {
1486         if (string_append (&s, e0.b) == -1 ||
1487             string_append (&s, e1.b) == -1)
1488           goto out_of_memory;
1489         *id_rtn = new_node (expr (EXPR_STRING, s));
1490       }
1491       return true;
1492     case EXPR_STRING:           /* byte string => string */
1493       len = e1.string.len;
1494       if (string_reserve (&s, len+1) == -1)
1495         goto out_of_memory;
1496       s.len = len+1;
1497       s.ptr[0] = e0.b;
1498       memcpy (&s.ptr[1], e1.string.ptr, len);
1499       *id_rtn = new_node (expr (EXPR_STRING, s));
1500       return true;
1501     case EXPR_FILL:             /* byte fill => fill, if the same */
1502       if (e0.b != e1.fl.b)
1503         return false;
1504       e2 = copy_expr (e1);
1505       e2.fl.n++;
1506       *id_rtn = new_node (e2);
1507       return true;
1508     default:
1509       return false;
1510     }
1511
1512   case EXPR_STRING:
1513     switch (e1.t) {
1514     case EXPR_BYTE:             /* string byte => string */
1515       len = e0.string.len;
1516       if (string_reserve (&s, len+1) == -1)
1517         goto out_of_memory;
1518       s.len = len+1;
1519       memcpy (s.ptr, e0.string.ptr, len);
1520       s.ptr[len] = e1.b;
1521       *id_rtn = new_node (expr (EXPR_STRING, s));
1522       return true;
1523     case EXPR_STRING:           /* string string => string */
1524       len = e0.string.len;
1525       len1 = e1.string.len;
1526       if (string_reserve (&s, len+len1) == -1)
1527         goto out_of_memory;
1528       s.len = len+len1;
1529       memcpy (s.ptr, e0.string.ptr, len);
1530       memcpy (&s.ptr[len], e1.string.ptr, len1);
1531       *id_rtn = new_node (expr (EXPR_STRING, s));
1532       return true;
1533     default:
1534       return false;
1535     }
1536
1537   case EXPR_FILL:
1538     switch (e1.t) {
1539     case EXPR_BYTE:             /* fill byte => fill, if the same */
1540       if (e0.fl.b != e1.b)
1541         return false;
1542       e2 = copy_expr (e0);
1543       e2.fl.n++;
1544       *id_rtn = new_node (e2);
1545       return true;
1546     case EXPR_FILL:             /* fill fill => fill, if the same */
1547       if (e0.fl.b != e1.fl.b)
1548         return false;
1549       e2 = copy_expr (e0);
1550       e2.fl.n += e1.fl.n;
1551       *id_rtn = new_node (e2);
1552       return true;
1553     default:
1554       return false;
1555     }
1556
1557   default:
1558     return false;
1559   }
1560
1561  out_of_memory:
1562   nbdkit_error ("realloc: %m");
1563   exit (EXIT_FAILURE);
1564 }
1565
1566 static int store_file (struct allocator *a,
1567                        const char *filename, uint64_t *offset);
1568 static int store_file_slice (struct allocator *a,
1569                              const char *filename,
1570                              uint64_t skip, int64_t end, uint64_t *offset);
1571 static int store_script (struct allocator *a,
1572                          const char *script, uint64_t *offset);
1573 static int store_script_len (struct allocator *a,
1574                              const char *script,
1575                              int64_t len, uint64_t *offset);
1576
1577 /* This is the evaluator.  It takes the root (node_id) of the parsed
1578  * abstract syntax tree and evaulates it into the allocator.
1579  */
1580 static int
1581 evaluate (const dict_t *dict, node_id root,
1582           struct allocator *a, uint64_t *offset, uint64_t *size)
1583 {
1584   /* 'd' is the local dictionary for this function.  Assignments are
1585    * added to the dictionary in this scope and passed to nested
1586    * scopes.  This is leaked on error paths, but we're going to call
1587    * exit(1).
1588    */
1589   dict_t *d = (dict_t *) dict;
1590   size_t i, j;
1591   node_ids list;
1592
1593   /* Extract the list from the current node.  If the current node is
1594    * not EXPR_LIST then make one for convenience below.
1595    */
1596   if (get_node (root).t == EXPR_LIST) {
1597     list = get_node (root).list;
1598   }
1599   else {
1600     list.len = 1;
1601     list.ptr = &root;
1602   }
1603
1604   for (i = 0; i < list.len; ++i) {
1605     const expr_t e = get_node (list.ptr[i]);
1606
1607     switch (e.t) {
1608     case EXPR_NULL: /* does nothing */ break;
1609
1610     case EXPR_BYTE:
1611       /* Store the byte. */
1612       if (a->f->write (a, &e.b, 1, *offset) == -1)
1613         return -1;
1614       (*offset)++;
1615       break;
1616
1617     case EXPR_ABS_OFFSET:
1618       /* XXX Check it does not overflow 63 bits. */
1619       *offset = e.ui;
1620       break;
1621
1622     case EXPR_REL_OFFSET:
1623       if (e.i < 0 && -e.i > *offset) {
1624         nbdkit_error ("data parameter @-%" PRIi64 " "
1625                       "must not be larger than the current offset %" PRIu64,
1626                       -e.i, *offset);
1627         return -1;
1628       }
1629       /* XXX Check it does not overflow 63 bits. */
1630       *offset += e.i;
1631       break;
1632
1633     case EXPR_ALIGN_OFFSET:
1634       *offset = ROUND_UP (*offset, e.ui);
1635       break;
1636
1637     case EXPR_FILE:
1638       if (store_file (a, e.filename, offset) == -1)
1639         return -1;
1640       break;
1641
1642     case EXPR_SCRIPT:
1643       if (store_script (a, e.script, offset) == -1)
1644         return -1;
1645       break;
1646
1647     case EXPR_STRING:
1648       /* Copy the string into the allocator. */
1649       if (a->f->write (a, e.string.ptr, e.string.len, *offset) == -1)
1650         return -1;
1651       *offset += e.string.len;
1652       break;
1653
1654     case EXPR_FILL:
1655       if (a->f->fill (a, e.fl.b, e.fl.n, *offset) == -1)
1656         return -1;
1657       *offset += e.fl.n;
1658       break;
1659
1660     case EXPR_ASSIGN: {
1661       dict_t *d_next = d;
1662
1663       d = malloc (sizeof *d);
1664       if (d == NULL) {
1665         nbdkit_error ("malloc: %m");
1666         return -1;
1667       }
1668       d->next = d_next;
1669       d->name = e.a.name;
1670       d->id = e.a.id;
1671       break;
1672     }
1673
1674     case EXPR_NAME: {
1675       CLEANUP_FREE_ALLOCATOR struct allocator *a2 = NULL;
1676       uint64_t offset2 = 0, size2 = 0;
1677       dict_t *t;
1678
1679       /* Look up the expression in the current dictionary. */
1680       for (t = d; t != NULL; t = t->next)
1681         if (strcmp (t->name, e.name) == 0)
1682           break;
1683       if (t == NULL) {
1684         nbdkit_error ("\\%s not defined", e.name);
1685         return -1;
1686       }
1687
1688       /* Evaluate and then substitute the expression. */
1689       a2 = create_allocator ("sparse", false);
1690       if (a2 == NULL) {
1691         nbdkit_error ("malloc: %m");
1692         return -1;
1693       }
1694       /* NB: We pass the environment at the time that the assignment was
1695        * made (t->next) not the current environment.  This is deliberate.
1696        */
1697       if (evaluate (t->next, t->id, a2, &offset2, &size2) == -1)
1698         return -1;
1699
1700       if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1701         return -1;
1702       *offset += size2;
1703       break;
1704     }
1705
1706     case EXPR_LIST:
1707     case EXPR_REPEAT:
1708     case EXPR_SLICE:
1709       /* Optimize some cases so we don't always have to create a
1710        * new allocator.
1711        */
1712
1713       /* <FILE[N:M] can be optimized by not reading in the whole file.
1714        * For files like /dev/urandom which are infinite this stops an
1715        * infinite loop.
1716        */
1717       if (e.t == EXPR_SLICE && get_node (e.sl.id).t == EXPR_FILE) {
1718         if (store_file_slice (a, get_node (e.sl.id).filename,
1719                               e.sl.n, e.sl.m, offset) == -1)
1720           return -1;
1721       }
1722
1723       /* <(SCRIPT)[:LEN] must be optimized by truncating the
1724        * output of the script.
1725        */
1726       else if (e.t == EXPR_SLICE && e.sl.n == 0 &&
1727                get_node (e.sl.id).t == EXPR_SCRIPT) {
1728         if (store_script_len (a, get_node (e.sl.id).script, e.sl.m,
1729                               offset) == -1)
1730           return -1;
1731       }
1732
1733       else {
1734         /* This is the non-optimized case.
1735          *
1736          * Nesting creates a new context where there is a new allocator
1737          * and the offset is reset to 0.
1738          */
1739         CLEANUP_FREE_ALLOCATOR struct allocator *a2 = NULL;
1740         uint64_t offset2 = 0, size2 = 0, m;
1741         node_id id;
1742
1743         switch (e.t) {
1744         case EXPR_LIST:   id = list.ptr[i]; break;
1745         case EXPR_REPEAT: id = e.r.id; break;
1746         case EXPR_SLICE:  id = e.sl.id; break;
1747         default: abort ();
1748         }
1749
1750         a2 = create_allocator ("sparse", false);
1751         if (a2 == NULL) {
1752           nbdkit_error ("malloc: %m");
1753           return -1;
1754         }
1755         if (evaluate (d, id, a2, &offset2, &size2) == -1)
1756           return -1;
1757
1758         switch (e.t) {
1759         case EXPR_LIST:
1760           if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1761             return -1;
1762           *offset += size2;
1763           break;
1764         case EXPR_REPEAT:
1765           /* Duplicate the allocator a2 N times. */
1766           for (j = 0; j < e.r.n; ++j) {
1767             if (a->f->blit (a2, a, size2, 0, *offset) == -1)
1768               return -1;
1769             *offset += size2;
1770           }
1771           break;
1772         case EXPR_SLICE:
1773           /* Slice [N:M] */
1774           m = e.sl.m < 0 ? size2 : e.sl.m;
1775           if (e.sl.n < 0 || m < 0 ||
1776               e.sl.n > size2 || m > size2 ||
1777               e.sl.n > m ) {
1778             nbdkit_error ("[N:M] does not describe a valid slice");
1779             return -1;
1780           }
1781           /* Take a slice from the allocator. */
1782           if (a->f->blit (a2, a, m-e.sl.n, e.sl.n, *offset) == -1)
1783             return -1;
1784           *offset += m-e.sl.n;
1785           break;
1786         default:
1787           abort ();
1788         }
1789       }
1790       break;
1791     }
1792
1793     /* Adjust the size if the offset is now larger. */
1794     if (*size < *offset)
1795       *size = *offset;
1796   } /* for */
1797
1798   /* Free assignments in the local dictionary. */
1799   while (d != dict) {
1800     dict_t *t = d;
1801     d = d->next;
1802     free (t);
1803   }
1804
1805   return 0;
1806 }
1807
1808 /* Store file at current offset in the allocator, updating the offset. */
1809 static int
1810 store_file (struct allocator *a,
1811             const char *filename, uint64_t *offset)
1812 {
1813   FILE *fp;
1814   char buf[BUFSIZ];
1815   size_t n;
1816
1817   fp = fopen (filename, "r");
1818   if (fp == NULL) {
1819     nbdkit_error ("%s: %m", filename);
1820     return -1;
1821   }
1822
1823   while (!feof (fp)) {
1824     n = fread (buf, 1, BUFSIZ, fp);
1825     if (n > 0) {
1826       if (a->f->write (a, buf, n, *offset) == -1) {
1827         fclose (fp);
1828         return -1;
1829       }
1830     }
1831     if (ferror (fp)) {
1832       nbdkit_error ("fread: %s: %m", filename);
1833       fclose (fp);
1834       return -1;
1835     }
1836     (*offset) += n;
1837   }
1838
1839   if (fclose (fp) == EOF) {
1840     nbdkit_error ("fclose: %s: %m", filename);
1841     return -1;
1842   }
1843
1844   return 0;
1845 }
1846
1847 /* <FILE[N:M] */
1848 static int
1849 store_file_slice (struct allocator *a,
1850                   const char *filename,
1851                   uint64_t skip, int64_t end, uint64_t *offset)
1852 {
1853   FILE *fp;
1854   char buf[BUFSIZ];
1855   size_t n;
1856   uint64_t len = 0;
1857
1858   if ((end >= 0 && skip > end) || end < -2) {
1859     nbdkit_error ("<FILE[N:M] does not describe a valid slice");
1860     return -1;
1861   }
1862
1863   if (end >= 0)
1864     len = end - skip;
1865
1866   fp = fopen (filename, "r");
1867   if (fp == NULL) {
1868     nbdkit_error ("%s: %m", filename);
1869     return -1;
1870   }
1871
1872   if (fseek (fp, skip, SEEK_SET) == -1) {
1873     nbdkit_error ("%s: fseek: %m", filename);
1874     return -1;
1875   }
1876
1877   while (!feof (fp) && (end == -1 || len > 0)) {
1878     n = fread (buf, 1, end == -1 ? BUFSIZ : MIN (len, BUFSIZ), fp);
1879     if (n > 0) {
1880       if (a->f->write (a, buf, n, *offset) == -1) {
1881         fclose (fp);
1882         return -1;
1883       }
1884     }
1885     if (ferror (fp)) {
1886       nbdkit_error ("fread: %s: %m", filename);
1887       fclose (fp);
1888       return -1;
1889     }
1890     (*offset) += n;
1891     len -= n;
1892   }
1893
1894   if (fclose (fp) == EOF) {
1895     nbdkit_error ("fclose: %s: %m", filename);
1896     return -1;
1897   }
1898
1899   return 0;
1900 }
1901
1902 #ifndef WIN32
1903
1904 /* Run the script and store the output in the allocator from offset. */
1905 static int
1906 store_script (struct allocator *a,
1907               const char *script, uint64_t *offset)
1908 {
1909   FILE *pp;
1910   char buf[BUFSIZ];
1911   size_t n;
1912
1913   pp = popen (script, "r");
1914   if (pp == NULL) {
1915     nbdkit_error ("popen: %m");
1916     return -1;
1917   }
1918
1919   while (!feof (pp)) {
1920     n = fread (buf, 1, BUFSIZ, pp);
1921     if (n > 0) {
1922       if (a->f->write (a, buf, n, *offset) == -1) {
1923         pclose (pp);
1924         return -1;
1925       }
1926     }
1927     if (ferror (pp)) {
1928       nbdkit_error ("fread: %m");
1929       pclose (pp);
1930       return -1;
1931     }
1932     (*offset) += n;
1933   }
1934
1935   if (pclose (pp) != 0) {
1936     nbdkit_error ("pclose: external command failed");
1937     return -1;
1938   }
1939
1940   return 0;
1941 }
1942
1943 /* Run the script and store up to len bytes of the output in the
1944  * allocator from offset.
1945  */
1946 static int
1947 store_script_len (struct allocator *a,
1948                   const char *script,
1949                   int64_t len, uint64_t *offset)
1950 {
1951   FILE *pp;
1952   char buf[BUFSIZ];
1953   size_t n;
1954
1955   /* Restore SIGPIPE back to SIG_DFL, since shell can't undo SIG_IGN */
1956   signal (SIGPIPE, SIG_DFL);
1957
1958   pp = popen (script, "r");
1959   if (pp == NULL) {
1960     nbdkit_error ("popen: %m");
1961     return -1;
1962   }
1963
1964   while (!feof (pp) && len > 0) {
1965     n = fread (buf, 1, MIN (len, BUFSIZ), pp);
1966     if (n > 0) {
1967       if (a->f->write (a, buf, n, *offset) == -1) {
1968         pclose (pp);
1969         return -1;
1970       }
1971     }
1972     if (ferror (pp)) {
1973       nbdkit_error ("fread: %m");
1974       pclose (pp);
1975       return -1;
1976     }
1977     (*offset) += n;
1978     len -= n;
1979   }
1980
1981   /* Note for historical reasons we do not fail here if the external
1982    * script returns an error.  That is because of existing documented
1983    * usage of scripts that generate infinite output which is
1984    * truncated.  These scripts will almost always generate an error
1985    * (some kind of pipe fail), and because we didn't check that before
1986    * we cannot start to fail those scripts now.
1987    */
1988   if (pclose (pp) == -1) {
1989     nbdkit_error ("pclose: %m");
1990     return -1;
1991   }
1992
1993   return 0;
1994 }
1995
1996 #else /* WIN32 */
1997
1998 static int
1999 store_script (struct allocator *a,
2000               const char *script, uint64_t *offset)
2001 {
2002   NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");
2003 }
2004
2005 static int
2006 store_script_len (struct allocator *a,
2007                   const char *script,
2008                   int64_t len, uint64_t *offset)
2009 {
2010   NOT_IMPLEMENTED_ON_WINDOWS ("<(SCRIPT)");
2011 }
2012
2013 #endif /* WIN32 */