usr.bin/hexdump/parse.c

   1 /*
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  * @(#)parse.c  8.1 (Berkeley) 6/6/93
  34  * $FreeBSD: src/usr.bin/hexdump/parse.c,v 1.4.2.1 2002/07/23 14:27:06 tjr Exp $
  35  * $DragonFly: src/usr.bin/hexdump/parse.c,v 1.6 2005/04/10 20:55:38 drhodus Exp $
  36  */
  37
  38 #include <sys/types.h>
  39
  40 #include <err.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <ctype.h>
  45 #include <string.h>
  46 #include "hexdump.h"
  47
  48 FU *endfu;                                      /* format at end-of-data */
  49
  50 void
  51 addfile(char *name)
  52 {
  53         unsigned char *p;
  54         FILE *fp;
  55         int ch;
  56         char buf[2048 + 1];
  57
  58         if ((fp = fopen(name, "r")) == NULL)
  59                 err(1, "%s", name);
  60         while (fgets(buf, sizeof(buf), fp)) {
  61                 if (!(p = strchr(buf, '\n'))) {
  62                         warnx("line too long");
  63                         while ((ch = getchar()) != '\n' && ch != EOF);
  64                         continue;
  65                 }
  66                 *p = '\0';
  67                 for (p = buf; *p && isspace(*p); ++p);
  68                 if (!*p || *p == '#')
  69                         continue;
  70                 add(p);
  71         }
  72         (void)fclose(fp);
  73 }
  74
  75 void
  76 add(const char *fmt)
  77 {
  78         unsigned const char *p, *savep;
  79         static FS **nextfs;
  80         FS *tfs;
  81         FU *tfu, **nextfu;
  82
  83         /* start new linked list of format units */
  84         if ((tfs = calloc(1, sizeof(FS))) == NULL)
  85                 err(1, NULL);
  86         if (!fshead)
  87                 fshead = tfs;
  88         else
  89                 *nextfs = tfs;
  90         nextfs = &tfs->nextfs;
  91         nextfu = &tfs->nextfu;
  92
  93         /* take the format string and break it up into format units */
  94         for (p = fmt;;) {
  95                 /* skip leading white space */
  96                 for (; isspace(*p); ++p);
  97                 if (!*p)
  98                         break;
  99
 100                 /* allocate a new format unit and link it in */
 101                 if ((tfu = calloc(1, sizeof(FU))) == NULL)
 102                         err(1, NULL);
 103                 *nextfu = tfu;
 104                 nextfu = &tfu->nextfu;
 105                 tfu->reps = 1;
 106
 107                 /* if leading digit, repetition count */
 108                 if (isdigit(*p)) {
 109                         for (savep = p; isdigit(*p); ++p);
 110                         if (!isspace(*p) && *p != '/')
 111                                 badfmt(fmt);
 112                         /* may overwrite either white space or slash */
 113                         tfu->reps = atoi(savep);
 114                         tfu->flags = F_SETREP;
 115                         /* skip trailing white space */
 116                         for (++p; isspace(*p); ++p);
 117                 }
 118
 119                 /* skip slash and trailing white space */
 120                 if (*p == '/')
 121                         while (isspace(*++p));
 122
 123                 /* byte count */
 124                 if (isdigit(*p)) {
 125                         for (savep = p; isdigit(*p); ++p);
 126                         if (!isspace(*p))
 127                                 badfmt(fmt);
 128                         tfu->bcnt = atoi(savep);
 129                         /* skip trailing white space */
 130                         for (++p; isspace(*p); ++p);
 131                 }
 132
 133                 /* format */
 134                 if (*p != '"')
 135                         badfmt(fmt);
 136                 for (savep = ++p; *p != '"';)
 137                         if (*p++ == 0)
 138                                 badfmt(fmt);
 139                 if (!(tfu->fmt = malloc(p - savep + 1)))
 140                         err(1, NULL);
 141                 (void) strncpy(tfu->fmt, savep, p - savep);
 142                 tfu->fmt[p - savep] = '\0';
 143                 escape(tfu->fmt);
 144                 p++;
 145         }
 146 }
 147
 148 static const char *spec = ".#-+ 0123456789";
 149
 150 int
 151 size(FS *fs)
 152 {
 153         FU *fu;
 154         int bcnt, cursize;
 155         unsigned char *fmt;
 156         int prec;
 157
 158         /* figure out the data block size needed for each format unit */
 159         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 160                 if (fu->bcnt) {
 161                         cursize += fu->bcnt * fu->reps;
 162                         continue;
 163                 }
 164                 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
 165                         if (*fmt != '%')
 166                                 continue;
 167                         /*
 168                          * skip any special chars -- save precision in
 169                          * case it's a %s format.
 170                          */
 171                         while (strchr(spec + 1, *++fmt));
 172                         if (*fmt == '.' && isdigit(*++fmt)) {
 173                                 prec = atoi(fmt);
 174                                 while (isdigit(*++fmt));
 175                         }
 176                         switch(*fmt) {
 177                         case 'c':
 178                                 bcnt += 1;
 179                                 break;
 180                         case 'd': case 'i': case 'o': case 'u':
 181                         case 'x': case 'X':
 182                                 bcnt += 4;
 183                                 break;
 184                         case 'e': case 'E': case 'f': case 'g': case 'G':
 185                                 bcnt += 8;
 186                                 break;
 187                         case 's':
 188                                 bcnt += prec;
 189                                 break;
 190                         case '_':
 191                                 switch(*++fmt) {
 192                                 case 'c': case 'p': case 'u':
 193                                         bcnt += 1;
 194                                         break;
 195                                 }
 196                         }
 197                 }
 198                 cursize += bcnt * fu->reps;
 199         }
 200         return (cursize);
 201 }
 202
 203 void
 204 rewrite(FS *fs)
 205 {
 206         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 207         PR *pr, **nextpr = NULL;
 208         FU *fu;
 209         unsigned char *p1, *p2, *fmtp;
 210         char savech, cs[3];
 211         int nconv, prec = 0;
 212
 213         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 214                 /*
 215                  * Break each format unit into print units; each conversion
 216                  * character gets its own.
 217                  */
 218                 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 219                         if ((pr = calloc(1, sizeof(PR))) == NULL)
 220                                 err(1, NULL);
 221                         if (!fu->nextpr)
 222                                 fu->nextpr = pr;
 223                         else
 224                                 *nextpr = pr;
 225
 226                         /* Skip preceding text and up to the next % sign. */
 227                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 228
 229                         /* Only text in the string. */
 230                         if (!*p1) {
 231                                 pr->fmt = fmtp;
 232                                 pr->flags = F_TEXT;
 233                                 break;
 234                         }
 235
 236                         /*
 237                          * Get precision for %s -- if have a byte count, don't
 238                          * need it.
 239                          */
 240                         if (fu->bcnt) {
 241                                 sokay = USEBCNT;
 242                                 /* Skip to conversion character. */
 243                                 for (++p1; strchr(spec, *p1); ++p1);
 244                         } else {
 245                                 /* Skip any special chars, field width. */
 246                                 while (strchr(spec + 1, *++p1));
 247                                 if (*p1 == '.' && isdigit(*++p1)) {
 248                                         sokay = USEPREC;
 249                                         prec = atoi(p1);
 250                                         while (isdigit(*++p1));
 251                                 } else
 252                                         sokay = NOTOKAY;
 253                         }
 254
 255                         p2 = p1 + 1;            /* Set end pointer. */
 256                         cs[0] = *p1;            /* Set conversion string. */
 257                         cs[1] = '\0';
 258
 259                         /*
 260                          * Figure out the byte count for each conversion;
 261                          * rewrite the format as necessary, set up blank-
 262                          * padding for end of data.
 263                          */
 264                         switch(cs[0]) {
 265                         case 'c':
 266                                 pr->flags = F_CHAR;
 267                                 switch(fu->bcnt) {
 268                                 case 0: case 1:
 269                                         pr->bcnt = 1;
 270                                         break;
 271                                 default:
 272                                         p1[1] = '\0';
 273                                         badcnt(p1);
 274                                 }
 275                                 break;
 276                         case 'd': case 'i':
 277                                 pr->flags = F_INT;
 278                                 goto isint;
 279                         case 'o': case 'u': case 'x': case 'X':
 280                                 pr->flags = F_UINT;
 281 isint:                          cs[2] = '\0';
 282                                 cs[1] = cs[0];
 283                                 cs[0] = 'q';
 284                                 switch(fu->bcnt) {
 285                                 case 0: case 4:
 286                                         pr->bcnt = 4;
 287                                         break;
 288                                 case 1:
 289                                         pr->bcnt = 1;
 290                                         break;
 291                                 case 2:
 292                                         pr->bcnt = 2;
 293                                         break;
 294                                 default:
 295                                         p1[1] = '\0';
 296                                         badcnt(p1);
 297                                 }
 298                                 break;
 299                         case 'e': case 'E': case 'f': case 'g': case 'G':
 300                                 pr->flags = F_DBL;
 301                                 switch(fu->bcnt) {
 302                                 case 0: case 8:
 303                                         pr->bcnt = 8;
 304                                         break;
 305                                 case 4:
 306                                         pr->bcnt = 4;
 307                                         break;
 308                                 default:
 309                                         if (fu->bcnt == sizeof(long double)) {
 310                                                 cs[2] = '\0';
 311                                                 cs[1] = cs[0];
 312                                                 cs[0] = 'L';
 313                                                 pr->bcnt = sizeof(long double);
 314                                         } else {
 315                                                 p1[1] = '\0';
 316                                                 badcnt(p1);
 317                                         }
 318                                 }
 319                                 break;
 320                         case 's':
 321                                 pr->flags = F_STR;
 322                                 switch(sokay) {
 323                                 case NOTOKAY:
 324                                         badsfmt();
 325                                 case USEBCNT:
 326                                         pr->bcnt = fu->bcnt;
 327                                         break;
 328                                 case USEPREC:
 329                                         pr->bcnt = prec;
 330                                         break;
 331                                 }
 332                                 break;
 333                         case '_':
 334                                 ++p2;
 335                                 switch(p1[1]) {
 336                                 case 'A':
 337                                         endfu = fu;
 338                                         fu->flags |= F_IGNORE;
 339                                         /* FALLTHROUGH */
 340                                 case 'a':
 341                                         pr->flags = F_ADDRESS;
 342                                         ++p2;
 343                                         switch(p1[2]) {
 344                                         case 'd': case 'o': case'x':
 345                                                 cs[0] = 'q';
 346                                                 cs[1] = p1[2];
 347                                                 cs[2] = '\0';
 348                                                 break;
 349                                         default:
 350                                                 p1[3] = '\0';
 351                                                 badconv(p1);
 352                                         }
 353                                         break;
 354                                 case 'c':
 355                                         pr->flags = F_C;
 356                                         /* cs[0] = 'c'; set in conv_c */
 357                                         goto isint2;
 358                                 case 'p':
 359                                         pr->flags = F_P;
 360                                         cs[0] = 'c';
 361                                         goto isint2;
 362                                 case 'u':
 363                                         pr->flags = F_U;
 364                                         /* cs[0] = 'c'; set in conv_u */
 365 isint2:                                 switch(fu->bcnt) {
 366                                         case 0: case 1:
 367                                                 pr->bcnt = 1;
 368                                                 break;
 369                                         default:
 370                                                 p1[2] = '\0';
 371                                                 badcnt(p1);
 372                                         }
 373                                         break;
 374                                 default:
 375                                         p1[2] = '\0';
 376                                         badconv(p1);
 377                                 }
 378                                 break;
 379                         default:
 380                                 p1[1] = '\0';
 381                                 badconv(p1);
 382                         }
 383
 384                         /*
 385                          * Copy to PR format string, set conversion character
 386                          * pointer, update original.
 387                          */
 388                         savech = *p2;
 389                         p1[0] = '\0';
 390                         if ((pr->fmt = calloc(1, strlen(fmtp) + 2)) == NULL)
 391                                 err(1, NULL);
 392                         (void)strcpy(pr->fmt, fmtp);
 393                         (void)strcat(pr->fmt, cs);
 394                         *p2 = savech;
 395                         pr->cchar = pr->fmt + (p1 - fmtp);
 396                         fmtp = p2;
 397
 398                         /* Only one conversion character if byte count. */
 399                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
 400             errx(1, "byte count with multiple conversion characters");
 401                 }
 402                 /*
 403                  * If format unit byte count not specified, figure it out
 404                  * so can adjust rep count later.
 405                  */
 406                 if (!fu->bcnt)
 407                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 408                                 fu->bcnt += pr->bcnt;
 409         }
 410         /*
 411          * If the format string interprets any data at all, and it's
 412          * not the same as the blocksize, and its last format unit
 413          * interprets any data at all, and has no iteration count,
 414          * repeat it as necessary.
 415          *
 416          * If, rep count is greater than 1, no trailing whitespace
 417          * gets output from the last iteration of the format unit.
 418          */
 419         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 420                 if (!fu->nextfu && fs->bcnt < blocksize &&
 421                     !(fu->flags&F_SETREP) && fu->bcnt)
 422                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 423                 if (fu->reps > 1) {
 424                         for (pr = fu->nextpr;; pr = pr->nextpr)
 425                                 if (!pr->nextpr)
 426                                         break;
 427                         for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
 428                                 p2 = isspace(*p1) ? p1 : NULL;
 429                         if (p2)
 430                                 pr->nospace = p2;
 431                 }
 432         }
 433 #ifdef DEBUG
 434         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 435                 (void)printf("fmt:");
 436                 for (pr = fu->nextpr; pr; pr = pr->nextpr)
 437                         (void)printf(" {%s}", pr->fmt);
 438                 (void)printf("\n");
 439         }
 440 #endif
 441 }
 442
 443 void
 444 escape(char *p1)
 445 {
 446         char *p2;
 447
 448         /* alphabetic escape sequences have to be done in place */
 449         for (p2 = p1;; ++p1, ++p2) {
 450                 if (!*p1) {
 451                         *p2 = *p1;
 452                         break;
 453                 }
 454                 if (*p1 == '\\')
 455                         switch(*++p1) {
 456                         case 'a':
 457                              /* *p2 = '\a'; */
 458                                 *p2 = '\007';
 459                                 break;
 460                         case 'b':
 461                                 *p2 = '\b';
 462                                 break;
 463                         case 'f':
 464                                 *p2 = '\f';
 465                                 break;
 466                         case 'n':
 467                                 *p2 = '\n';
 468                                 break;
 469                         case 'r':
 470                                 *p2 = '\r';
 471                                 break;
 472                         case 't':
 473                                 *p2 = '\t';
 474                                 break;
 475                         case 'v':
 476                                 *p2 = '\v';
 477                                 break;
 478                         default:
 479                                 *p2 = *p1;
 480                                 break;
 481                         }
 482         }
 483 }
 484
 485 void
 486 badcnt(char *s)
 487 {
 488         errx(1, "%s: bad byte count", s);
 489 }
 490
 491 void
 492 badsfmt(void)
 493 {
 494         errx(1, "%%s: requires a precision or a byte count");
 495 }
 496
 497 void
 498 badfmt(const char *fmt)
 499 {
 500         errx(1, "\"%s\": bad format", fmt);
 501 }
 502
 503 void
 504 badconv(char *ch)
 505 {
 506         errx(1, "%%%s: bad conversion character", ch);
 507 }