usr.bin/hexdump/parse.c

   1 /*
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  * @(#)parse.c  8.1 (Berkeley) 6/6/93
  30  * $FreeBSD: src/usr.bin/hexdump/parse.c,v 1.4.2.1 2002/07/23 14:27:06 tjr Exp $
  31  * $DragonFly: src/usr.bin/hexdump/parse.c,v 1.6 2005/04/10 20:55:38 drhodus Exp $
  32  */
  33
  34 #include <sys/types.h>
  35
  36 #include <err.h>
  37 #include <fcntl.h>
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <ctype.h>
  41 #include <string.h>
  42 #include "hexdump.h"
  43
  44 FU *endfu;                                      /* format at end-of-data */
  45
  46 void
  47 addfile(char *name)
  48 {
  49         unsigned char *p;
  50         FILE *fp;
  51         int ch;
  52         char buf[2048 + 1];
  53
  54         if ((fp = fopen(name, "r")) == NULL)
  55                 err(1, "%s", name);
  56         while (fgets(buf, sizeof(buf), fp)) {
  57                 if (!(p = strchr(buf, '\n'))) {
  58                         warnx("line too long");
  59                         while ((ch = getchar()) != '\n' && ch != EOF);
  60                         continue;
  61                 }
  62                 *p = '\0';
  63                 for (p = buf; *p && isspace(*p); ++p);
  64                 if (!*p || *p == '#')
  65                         continue;
  66                 add(p);
  67         }
  68         (void)fclose(fp);
  69 }
  70
  71 void
  72 add(const char *fmt)
  73 {
  74         unsigned const char *p, *savep;
  75         static FS **nextfs;
  76         FS *tfs;
  77         FU *tfu, **nextfu;
  78
  79         /* start new linked list of format units */
  80         if ((tfs = calloc(1, sizeof(FS))) == NULL)
  81                 err(1, NULL);
  82         if (!fshead)
  83                 fshead = tfs;
  84         else
  85                 *nextfs = tfs;
  86         nextfs = &tfs->nextfs;
  87         nextfu = &tfs->nextfu;
  88
  89         /* take the format string and break it up into format units */
  90         for (p = fmt;;) {
  91                 /* skip leading white space */
  92                 for (; isspace(*p); ++p);
  93                 if (!*p)
  94                         break;
  95
  96                 /* allocate a new format unit and link it in */
  97                 if ((tfu = calloc(1, sizeof(FU))) == NULL)
  98                         err(1, NULL);
  99                 *nextfu = tfu;
 100                 nextfu = &tfu->nextfu;
 101                 tfu->reps = 1;
 102
 103                 /* if leading digit, repetition count */
 104                 if (isdigit(*p)) {
 105                         for (savep = p; isdigit(*p); ++p);
 106                         if (!isspace(*p) && *p != '/')
 107                                 badfmt(fmt);
 108                         /* may overwrite either white space or slash */
 109                         tfu->reps = atoi(savep);
 110                         tfu->flags = F_SETREP;
 111                         /* skip trailing white space */
 112                         for (++p; isspace(*p); ++p);
 113                 }
 114
 115                 /* skip slash and trailing white space */
 116                 if (*p == '/')
 117                         while (isspace(*++p));
 118
 119                 /* byte count */
 120                 if (isdigit(*p)) {
 121                         for (savep = p; isdigit(*p); ++p);
 122                         if (!isspace(*p))
 123                                 badfmt(fmt);
 124                         tfu->bcnt = atoi(savep);
 125                         /* skip trailing white space */
 126                         for (++p; isspace(*p); ++p);
 127                 }
 128
 129                 /* format */
 130                 if (*p != '"')
 131                         badfmt(fmt);
 132                 for (savep = ++p; *p != '"';)
 133                         if (*p++ == 0)
 134                                 badfmt(fmt);
 135                 if (!(tfu->fmt = malloc(p - savep + 1)))
 136                         err(1, NULL);
 137                 (void) strncpy(tfu->fmt, savep, p - savep);
 138                 tfu->fmt[p - savep] = '\0';
 139                 escape(tfu->fmt);
 140                 p++;
 141         }
 142 }
 143
 144 static const char *spec = ".#-+ 0123456789";
 145
 146 int
 147 size(FS *fs)
 148 {
 149         FU *fu;
 150         int bcnt, cursize;
 151         unsigned char *fmt;
 152         int prec;
 153
 154         /* figure out the data block size needed for each format unit */
 155         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 156                 if (fu->bcnt) {
 157                         cursize += fu->bcnt * fu->reps;
 158                         continue;
 159                 }
 160                 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
 161                         if (*fmt != '%')
 162                                 continue;
 163                         /*
 164                          * skip any special chars -- save precision in
 165                          * case it's a %s format.
 166                          */
 167                         while (strchr(spec + 1, *++fmt));
 168                         if (*fmt == '.' && isdigit(*++fmt)) {
 169                                 prec = atoi(fmt);
 170                                 while (isdigit(*++fmt));
 171                         }
 172                         switch(*fmt) {
 173                         case 'c':
 174                                 bcnt += 1;
 175                                 break;
 176                         case 'd': case 'i': case 'o': case 'u':
 177                         case 'x': case 'X':
 178                                 bcnt += 4;
 179                                 break;
 180                         case 'e': case 'E': case 'f': case 'g': case 'G':
 181                                 bcnt += 8;
 182                                 break;
 183                         case 's':
 184                                 bcnt += prec;
 185                                 break;
 186                         case '_':
 187                                 switch(*++fmt) {
 188                                 case 'c': case 'p': case 'u':
 189                                         bcnt += 1;
 190                                         break;
 191                                 }
 192                         }
 193                 }
 194                 cursize += bcnt * fu->reps;
 195         }
 196         return (cursize);
 197 }
 198
 199 void
 200 rewrite(FS *fs)
 201 {
 202         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 203         PR *pr, **nextpr = NULL;
 204         FU *fu;
 205         unsigned char *p1, *p2, *fmtp;
 206         char savech, cs[3];
 207         int nconv, prec = 0;
 208
 209         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 210                 /*
 211                  * Break each format unit into print units; each conversion
 212                  * character gets its own.
 213                  */
 214                 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 215                         if ((pr = calloc(1, sizeof(PR))) == NULL)
 216                                 err(1, NULL);
 217                         if (!fu->nextpr)
 218                                 fu->nextpr = pr;
 219                         else
 220                                 *nextpr = pr;
 221
 222                         /* Skip preceding text and up to the next % sign. */
 223                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 224
 225                         /* Only text in the string. */
 226                         if (!*p1) {
 227                                 pr->fmt = fmtp;
 228                                 pr->flags = F_TEXT;
 229                                 break;
 230                         }
 231
 232                         /*
 233                          * Get precision for %s -- if have a byte count, don't
 234                          * need it.
 235                          */
 236                         if (fu->bcnt) {
 237                                 sokay = USEBCNT;
 238                                 /* Skip to conversion character. */
 239                                 for (++p1; strchr(spec, *p1); ++p1);
 240                         } else {
 241                                 /* Skip any special chars, field width. */
 242                                 while (strchr(spec + 1, *++p1));
 243                                 if (*p1 == '.' && isdigit(*++p1)) {
 244                                         sokay = USEPREC;
 245                                         prec = atoi(p1);
 246                                         while (isdigit(*++p1));
 247                                 } else
 248                                         sokay = NOTOKAY;
 249                         }
 250
 251                         p2 = p1 + 1;            /* Set end pointer. */
 252                         cs[0] = *p1;            /* Set conversion string. */
 253                         cs[1] = '\0';
 254
 255                         /*
 256                          * Figure out the byte count for each conversion;
 257                          * rewrite the format as necessary, set up blank-
 258                          * padding for end of data.
 259                          */
 260                         switch(cs[0]) {
 261                         case 'c':
 262                                 pr->flags = F_CHAR;
 263                                 switch(fu->bcnt) {
 264                                 case 0: case 1:
 265                                         pr->bcnt = 1;
 266                                         break;
 267                                 default:
 268                                         p1[1] = '\0';
 269                                         badcnt(p1);
 270                                 }
 271                                 break;
 272                         case 'd': case 'i':
 273                                 pr->flags = F_INT;
 274                                 goto isint;
 275                         case 'o': case 'u': case 'x': case 'X':
 276                                 pr->flags = F_UINT;
 277 isint:                          cs[2] = '\0';
 278                                 cs[1] = cs[0];
 279                                 cs[0] = 'q';
 280                                 switch(fu->bcnt) {
 281                                 case 0: case 4:
 282                                         pr->bcnt = 4;
 283                                         break;
 284                                 case 1:
 285                                         pr->bcnt = 1;
 286                                         break;
 287                                 case 2:
 288                                         pr->bcnt = 2;
 289                                         break;
 290                                 default:
 291                                         p1[1] = '\0';
 292                                         badcnt(p1);
 293                                 }
 294                                 break;
 295                         case 'e': case 'E': case 'f': case 'g': case 'G':
 296                                 pr->flags = F_DBL;
 297                                 switch(fu->bcnt) {
 298                                 case 0: case 8:
 299                                         pr->bcnt = 8;
 300                                         break;
 301                                 case 4:
 302                                         pr->bcnt = 4;
 303                                         break;
 304                                 default:
 305                                         if (fu->bcnt == sizeof(long double)) {
 306                                                 cs[2] = '\0';
 307                                                 cs[1] = cs[0];
 308                                                 cs[0] = 'L';
 309                                                 pr->bcnt = sizeof(long double);
 310                                         } else {
 311                                                 p1[1] = '\0';
 312                                                 badcnt(p1);
 313                                         }
 314                                 }
 315                                 break;
 316                         case 's':
 317                                 pr->flags = F_STR;
 318                                 switch(sokay) {
 319                                 case NOTOKAY:
 320                                         badsfmt();
 321                                 case USEBCNT:
 322                                         pr->bcnt = fu->bcnt;
 323                                         break;
 324                                 case USEPREC:
 325                                         pr->bcnt = prec;
 326                                         break;
 327                                 }
 328                                 break;
 329                         case '_':
 330                                 ++p2;
 331                                 switch(p1[1]) {
 332                                 case 'A':
 333                                         endfu = fu;
 334                                         fu->flags |= F_IGNORE;
 335                                         /* FALLTHROUGH */
 336                                 case 'a':
 337                                         pr->flags = F_ADDRESS;
 338                                         ++p2;
 339                                         switch(p1[2]) {
 340                                         case 'd': case 'o': case'x':
 341                                                 cs[0] = 'q';
 342                                                 cs[1] = p1[2];
 343                                                 cs[2] = '\0';
 344                                                 break;
 345                                         default:
 346                                                 p1[3] = '\0';
 347                                                 badconv(p1);
 348                                         }
 349                                         break;
 350                                 case 'c':
 351                                         pr->flags = F_C;
 352                                         /* cs[0] = 'c'; set in conv_c */
 353                                         goto isint2;
 354                                 case 'p':
 355                                         pr->flags = F_P;
 356                                         cs[0] = 'c';
 357                                         goto isint2;
 358                                 case 'u':
 359                                         pr->flags = F_U;
 360                                         /* cs[0] = 'c'; set in conv_u */
 361 isint2:                                 switch(fu->bcnt) {
 362                                         case 0: case 1:
 363                                                 pr->bcnt = 1;
 364                                                 break;
 365                                         default:
 366                                                 p1[2] = '\0';
 367                                                 badcnt(p1);
 368                                         }
 369                                         break;
 370                                 default:
 371                                         p1[2] = '\0';
 372                                         badconv(p1);
 373                                 }
 374                                 break;
 375                         default:
 376                                 p1[1] = '\0';
 377                                 badconv(p1);
 378                         }
 379
 380                         /*
 381                          * Copy to PR format string, set conversion character
 382                          * pointer, update original.
 383                          */
 384                         savech = *p2;
 385                         p1[0] = '\0';
 386                         if ((pr->fmt = calloc(1, strlen(fmtp) + 2)) == NULL)
 387                                 err(1, NULL);
 388                         (void)strcpy(pr->fmt, fmtp);
 389                         (void)strcat(pr->fmt, cs);
 390                         *p2 = savech;
 391                         pr->cchar = pr->fmt + (p1 - fmtp);
 392                         fmtp = p2;
 393
 394                         /* Only one conversion character if byte count. */
 395                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
 396             errx(1, "byte count with multiple conversion characters");
 397                 }
 398                 /*
 399                  * If format unit byte count not specified, figure it out
 400                  * so can adjust rep count later.
 401                  */
 402                 if (!fu->bcnt)
 403                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 404                                 fu->bcnt += pr->bcnt;
 405         }
 406         /*
 407          * If the format string interprets any data at all, and it's
 408          * not the same as the blocksize, and its last format unit
 409          * interprets any data at all, and has no iteration count,
 410          * repeat it as necessary.
 411          *
 412          * If, rep count is greater than 1, no trailing whitespace
 413          * gets output from the last iteration of the format unit.
 414          */
 415         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 416                 if (!fu->nextfu && fs->bcnt < blocksize &&
 417                     !(fu->flags&F_SETREP) && fu->bcnt)
 418                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 419                 if (fu->reps > 1) {
 420                         for (pr = fu->nextpr;; pr = pr->nextpr)
 421                                 if (!pr->nextpr)
 422                                         break;
 423                         for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
 424                                 p2 = isspace(*p1) ? p1 : NULL;
 425                         if (p2)
 426                                 pr->nospace = p2;
 427                 }
 428         }
 429 #ifdef DEBUG
 430         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 431                 (void)printf("fmt:");
 432                 for (pr = fu->nextpr; pr; pr = pr->nextpr)
 433                         (void)printf(" {%s}", pr->fmt);
 434                 (void)printf("\n");
 435         }
 436 #endif
 437 }
 438
 439 void
 440 escape(char *p1)
 441 {
 442         char *p2;
 443
 444         /* alphabetic escape sequences have to be done in place */
 445         for (p2 = p1;; ++p1, ++p2) {
 446                 if (!*p1) {
 447                         *p2 = *p1;
 448                         break;
 449                 }
 450                 if (*p1 == '\\')
 451                         switch(*++p1) {
 452                         case 'a':
 453                              /* *p2 = '\a'; */
 454                                 *p2 = '\007';
 455                                 break;
 456                         case 'b':
 457                                 *p2 = '\b';
 458                                 break;
 459                         case 'f':
 460                                 *p2 = '\f';
 461                                 break;
 462                         case 'n':
 463                                 *p2 = '\n';
 464                                 break;
 465                         case 'r':
 466                                 *p2 = '\r';
 467                                 break;
 468                         case 't':
 469                                 *p2 = '\t';
 470                                 break;
 471                         case 'v':
 472                                 *p2 = '\v';
 473                                 break;
 474                         default:
 475                                 *p2 = *p1;
 476                                 break;
 477                         }
 478         }
 479 }
 480
 481 void
 482 badcnt(char *s)
 483 {
 484         errx(1, "%s: bad byte count", s);
 485 }
 486
 487 void
 488 badsfmt(void)
 489 {
 490         errx(1, "%%s: requires a precision or a byte count");
 491 }
 492
 493 void
 494 badfmt(const char *fmt)
 495 {
 496         errx(1, "\"%s\": bad format", fmt);
 497 }
 498
 499 void
 500 badconv(char *ch)
 501 {
 502         errx(1, "%%%s: bad conversion character", ch);
 503 }