stdscan.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 #include "compiler.h"
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <inttypes.h>
  41
  42 #include "nasm.h"
  43 #include "nasmlib.h"
  44 #include "quote.h"
  45 #include "stdscan.h"
  46 #include "insns.h"
  47
  48 /*
  49  * Standard scanner routine used by parser.c and some output
  50  * formats. It keeps a succession of temporary-storage strings in
  51  * stdscan_tempstorage, which can be cleared using stdscan_reset.
  52  */
  53 static char *stdscan_bufptr = NULL;
  54 static char **stdscan_tempstorage = NULL;
  55 static int stdscan_tempsize = 0, stdscan_templen = 0;
  56 static int brace = 0;               /* nested brace counter */
  57 static bool brace_opened = false;   /* if brace is just opened */
  58 #define STDSCAN_TEMP_DELTA 256
  59
  60 void stdscan_set(char *str)
  61 {
  62         stdscan_bufptr = str;
  63 }
  64
  65 char *stdscan_get(void)
  66 {
  67         return stdscan_bufptr;
  68 }
  69
  70 static void stdscan_pop(void)
  71 {
  72     nasm_free(stdscan_tempstorage[--stdscan_templen]);
  73 }
  74
  75 void stdscan_reset(void)
  76 {
  77     while (stdscan_templen > 0)
  78         stdscan_pop();
  79 }
  80
  81 /*
  82  * Unimportant cleanup is done to avoid confusing people who are trying
  83  * to debug real memory leaks
  84  */
  85 void stdscan_cleanup(void)
  86 {
  87     stdscan_reset();
  88     nasm_free(stdscan_tempstorage);
  89 }
  90
  91 static char *stdscan_copy(char *p, int len)
  92 {
  93     char *text;
  94
  95     text = nasm_malloc(len + 1);
  96     memcpy(text, p, len);
  97     text[len] = '\0';
  98
  99     if (stdscan_templen >= stdscan_tempsize) {
 100         stdscan_tempsize += STDSCAN_TEMP_DELTA;
 101         stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
 102                                            stdscan_tempsize *
 103                                            sizeof(char *));
 104     }
 105     stdscan_tempstorage[stdscan_templen++] = text;
 106
 107     return text;
 108 }
 109
 110 /*
 111  * a token is enclosed with braces. proper token type will be assigned
 112  * accordingly with the token flag.
 113  * a closing brace is treated as an ending character of corresponding token.
 114  */
 115 static int stdscan_handle_brace(struct tokenval *tv)
 116 {
 117     if (!(tv->t_flag & TFLAG_BRC_ANY)) {
 118         /* invalid token is put inside braces */
 119         nasm_error(ERR_NONFATAL,
 120                     "%s is not a valid decorator with braces", tv->t_charptr);
 121         tv->t_type = TOKEN_INVALID;
 122     } else if (tv->t_flag & TFLAG_BRC_OPT) {
 123         if (is_reg_class(OPMASKREG, tv->t_integer)) {
 124             /* within braces, opmask register is now used as a mask */
 125             tv->t_type = TOKEN_OPMASK;
 126         }
 127     }
 128
 129     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
 130
 131     if (stdscan_bufptr[0] == '}') {
 132         stdscan_bufptr ++;      /* skip the closing brace */
 133         brace --;
 134     } else if (stdscan_bufptr[0] != ',') {
 135         /* treat {foo,bar} as {foo}{bar}
 136          * by regarding ',' as a mere separator between decorators
 137          */
 138         nasm_error(ERR_NONFATAL, "closing brace expected");
 139         tv->t_type = TOKEN_INVALID;
 140     }
 141     return tv->t_type;
 142 }
 143
 144 int stdscan(void *private_data, struct tokenval *tv)
 145 {
 146     char ourcopy[MAX_KEYWORD + 1], *r, *s;
 147
 148     (void)private_data;         /* Don't warn that this parameter is unused */
 149
 150     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
 151     if (!*stdscan_bufptr) {
 152         /* nested brace shouldn't affect following lines */
 153         brace = 0;
 154         return tv->t_type = TOKEN_EOS;
 155     }
 156
 157     /* we have a token; either an id, a number or a char */
 158     if (isidstart(*stdscan_bufptr) ||
 159         (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1])) ||
 160         (brace && isidchar(*stdscan_bufptr))) {     /* because of {1to8} */
 161         /* now we've got an identifier */
 162         bool is_sym = false;
 163         int token_type;
 164
 165         /* opening brace is followed by any letter */
 166         brace_opened = false;
 167
 168         if (*stdscan_bufptr == '$') {
 169             is_sym = true;
 170             stdscan_bufptr++;
 171         }
 172
 173         r = stdscan_bufptr++;
 174         /* read the entire buffer to advance the buffer pointer but... */
 175         /* {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} contain '-' in tokens. */
 176         while (isidchar(*stdscan_bufptr) || (brace && *stdscan_bufptr == '-'))
 177             stdscan_bufptr++;
 178
 179         /* ... copy only up to IDLEN_MAX-1 characters */
 180         tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r < IDLEN_MAX ?
 181                                      stdscan_bufptr - r : IDLEN_MAX - 1);
 182
 183         if (is_sym || stdscan_bufptr - r > MAX_KEYWORD)
 184             return tv->t_type = TOKEN_ID;       /* bypass all other checks */
 185
 186         for (s = tv->t_charptr, r = ourcopy; *s; s++)
 187             *r++ = nasm_tolower(*s);
 188         *r = '\0';
 189         /* right, so we have an identifier sitting in temp storage. now,
 190          * is it actually a register or instruction name, or what? */
 191         token_type = nasm_token_hash(ourcopy, tv);
 192
 193         if (likely(!brace)) {
 194             if (likely(!(tv->t_flag & TFLAG_BRC))) {
 195                 /* most of the tokens fall into this case */
 196                 return token_type;
 197             } else {
 198                 return tv->t_type = TOKEN_ID;
 199             }
 200         } else {
 201             /* handle tokens inside braces */
 202             return stdscan_handle_brace(tv);
 203         }
 204     } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
 205         /*
 206          * It's a $ sign with no following hex number; this must
 207          * mean it's a Here token ($), evaluating to the current
 208          * assembly location, or a Base token ($$), evaluating to
 209          * the base of the current segment.
 210          */
 211         stdscan_bufptr++;
 212         if (*stdscan_bufptr == '$') {
 213             stdscan_bufptr++;
 214             return tv->t_type = TOKEN_BASE;
 215         }
 216         return tv->t_type = TOKEN_HERE;
 217     } else if (isnumstart(*stdscan_bufptr)) {   /* now we've got a number */
 218         bool rn_error;
 219         bool is_hex = false;
 220         bool is_float = false;
 221         bool has_e = false;
 222         char c;
 223
 224         r = stdscan_bufptr;
 225
 226         if (*stdscan_bufptr == '$') {
 227             stdscan_bufptr++;
 228             is_hex = true;
 229         }
 230
 231         for (;;) {
 232             c = *stdscan_bufptr++;
 233
 234             if (!is_hex && (c == 'e' || c == 'E')) {
 235                 has_e = true;
 236                 if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-') {
 237                     /*
 238                      * e can only be followed by +/- if it is either a
 239                      * prefixed hex number or a floating-point number
 240                      */
 241                     is_float = true;
 242                     stdscan_bufptr++;
 243                 }
 244             } else if (c == 'H' || c == 'h' || c == 'X' || c == 'x') {
 245                 is_hex = true;
 246             } else if (c == 'P' || c == 'p') {
 247                 is_float = true;
 248                 if (*stdscan_bufptr == '+' || *stdscan_bufptr == '-')
 249                     stdscan_bufptr++;
 250             } else if (isnumchar(c) || c == '_')
 251                 ; /* just advance */
 252             else if (c == '.')
 253                 is_float = true;
 254             else
 255                 break;
 256         }
 257         stdscan_bufptr--;       /* Point to first character beyond number */
 258
 259         if (has_e && !is_hex) {
 260             /* 1e13 is floating-point, but 1e13h is not */
 261             is_float = true;
 262         }
 263
 264         if (is_float) {
 265             tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
 266             return tv->t_type = TOKEN_FLOAT;
 267         } else {
 268             r = stdscan_copy(r, stdscan_bufptr - r);
 269             tv->t_integer = readnum(r, &rn_error);
 270             stdscan_pop();
 271             if (rn_error) {
 272                 /* some malformation occurred */
 273                 return tv->t_type = TOKEN_ERRNUM;
 274             }
 275             tv->t_charptr = NULL;
 276             return tv->t_type = TOKEN_NUM;
 277         }
 278     } else if (*stdscan_bufptr == '\'' || *stdscan_bufptr == '"' ||
 279                *stdscan_bufptr == '`') {
 280         /* a quoted string */
 281         char start_quote = *stdscan_bufptr;
 282         tv->t_charptr = stdscan_bufptr;
 283         tv->t_inttwo = nasm_unquote(tv->t_charptr, &stdscan_bufptr);
 284         if (*stdscan_bufptr != start_quote)
 285             return tv->t_type = TOKEN_ERRSTR;
 286         stdscan_bufptr++;       /* Skip final quote */
 287         return tv->t_type = TOKEN_STR;
 288     } else if (*stdscan_bufptr == ';') {
 289         /* a comment has happened - stay */
 290         return tv->t_type = TOKEN_EOS;
 291     } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
 292         stdscan_bufptr += 2;
 293         return tv->t_type = TOKEN_SHR;
 294     } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
 295         stdscan_bufptr += 2;
 296         return tv->t_type = TOKEN_SHL;
 297     } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
 298         stdscan_bufptr += 2;
 299         return tv->t_type = TOKEN_SDIV;
 300     } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
 301         stdscan_bufptr += 2;
 302         return tv->t_type = TOKEN_SMOD;
 303     } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
 304         stdscan_bufptr += 2;
 305         return tv->t_type = TOKEN_EQ;
 306     } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
 307         stdscan_bufptr += 2;
 308         return tv->t_type = TOKEN_NE;
 309     } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
 310         stdscan_bufptr += 2;
 311         return tv->t_type = TOKEN_NE;
 312     } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
 313         stdscan_bufptr += 2;
 314         return tv->t_type = TOKEN_LE;
 315     } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
 316         stdscan_bufptr += 2;
 317         return tv->t_type = TOKEN_GE;
 318     } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
 319         stdscan_bufptr += 2;
 320         return tv->t_type = TOKEN_DBL_AND;
 321     } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
 322         stdscan_bufptr += 2;
 323         return tv->t_type = TOKEN_DBL_XOR;
 324     } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
 325         stdscan_bufptr += 2;
 326         return tv->t_type = TOKEN_DBL_OR;
 327     } else if (stdscan_bufptr[0] == '{') {
 328         stdscan_bufptr ++;      /* skip the opening brace */
 329         brace ++;               /* in case of nested braces */
 330         brace_opened = true;    /* brace is just opened */
 331         return stdscan(private_data, tv);
 332     } else if (stdscan_bufptr[0] == ',' && brace) {
 333         /*
 334          * a comma inside braces should be treated just as a separator.
 335          * this is almost same as an opening brace except increasing counter.
 336          */
 337         stdscan_bufptr ++;
 338         brace_opened = true;    /* brace is just opened */
 339         return stdscan(private_data, tv);
 340     } else if (stdscan_bufptr[0] == '}') {
 341         stdscan_bufptr ++;      /* skip the closing brace */
 342         if (brace) {
 343             /* unhandled nested closing brace */
 344             brace --;
 345             /* if brace is closed without any content in it */
 346             if (brace_opened) {
 347                 brace_opened = false;
 348                 nasm_error(ERR_NONFATAL, "nothing inside braces");
 349             }
 350             return stdscan(private_data, tv);
 351         } else {
 352             /* redundant closing brace */
 353             return tv->t_type = TOKEN_INVALID;
 354         }
 355         return stdscan(private_data, tv);
 356     } else                      /* just an ordinary char */
 357         return tv->t_type = (uint8_t)(*stdscan_bufptr++);
 358 }