1 /* Parse C expressions for CCCP.
2 Copyright (C) 1987, 1992, 1994, 1995 Free Software Foundation.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA.
19 In other words, you are welcome to use, share and improve this program.
20 You are forbidden to forbid anyone else to use, share and improve
21 what you give them. Help stamp out software-hoarding!
23 Adapted from expread.y of GDB by Paul Rubin, July 1986. */
25 /* Parse a C expression from text in a string */
30 /* #define YYDEBUG 1 */
32 #ifdef MULTIBYTE_CHARS
39 typedef
unsigned char U_CHAR
;
41 /* This is used for communicating lists of keywords with cccp.c. */
49 /* Define a generic NULL if one hasn't already been defined. */
56 #if defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__)
57 #define GENERIC_PTR void *
59 #define GENERIC_PTR char *
63 /* Find the largest host integer type and set its size and type. */
65 #ifndef HOST_BITS_PER_WIDE_INT
67 #if HOST_BITS_PER_LONG > HOST_BITS_PER_INT
68 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_LONG
69 #define HOST_WIDE_INT long
71 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_INT
72 #define HOST_WIDE_INT int
78 #define NULL_PTR ((GENERIC_PTR)0)
83 HOST_WIDE_INT expression_value
;
85 static jmp_buf parse_return_error
;
87 /* Nonzero means count most punctuation as part of a name. */
88 static int keyword_parsing
= 0;
90 /* Nonzero means do not evaluate this expression.
91 This is a count, since unevaluated expressions can nest. */
92 static int skip_evaluation
;
94 /* some external tables of character types */
95 extern
unsigned char is_idstart
[], is_idchar
[], is_hor_space
[];
97 extern
char *xmalloc
();
99 /* Flag for -pedantic. */
102 /* Flag for -traditional. */
103 extern
int traditional
;
105 #ifndef CHAR_TYPE_SIZE
106 #define CHAR_TYPE_SIZE BITS_PER_UNIT
109 #ifndef INT_TYPE_SIZE
110 #define INT_TYPE_SIZE BITS_PER_WORD
113 #ifndef LONG_TYPE_SIZE
114 #define LONG_TYPE_SIZE BITS_PER_WORD
117 #ifndef WCHAR_TYPE_SIZE
118 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
121 #ifndef MAX_CHAR_TYPE_SIZE
122 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
125 #ifndef MAX_INT_TYPE_SIZE
126 #define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
129 #ifndef MAX_LONG_TYPE_SIZE
130 #define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
133 #ifndef MAX_WCHAR_TYPE_SIZE
134 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
137 /* Yield nonzero if adding two numbers with A's and B's signs can yield a
138 number with SUM's sign, where A, B, and SUM are all C integers. */
139 #define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
141 static void integer_overflow
();
142 static long left_shift
();
143 static long right_shift
();
147 struct constant
{long value
; int unsignedp
;} integer
;
148 struct name
{U_CHAR
*address
; int length
;} name
;
149 struct arglist
*keywords
;
152 %type
<integer
> exp exp1 start
153 %type
<keywords
> keywords
154 %token
<integer
> INT CHAR
156 %token
<integer
> ERROR
166 %left
'<' '>' LEQ GEQ
177 { expression_value
= $1.value
; }
180 /* Expressions, including the comma operator. */
184 pedwarn
("comma operator in operand of `#if'");
188 /* Expressions, not including the comma operator. */
189 exp
: '-' exp %prec UNARY
190 { $$.value
= - $2.value
;
191 if
(($$.value
& $2.value
) < 0 && ! $2.unsignedp
)
193 $$.unsignedp
= $2.unsignedp
; }
194 |
'!' exp %prec UNARY
195 { $$.value
= ! $2.value
;
197 |
'+' exp %prec UNARY
199 |
'~' exp %prec UNARY
200 { $$.value
= ~
$2.value
;
201 $$.unsignedp
= $2.unsignedp
; }
203 { $$.value
= check_assertion
($2.address
, $2.length
,
207 { keyword_parsing
= 1; }
209 { $$.value
= check_assertion
($2.address
, $2.length
,
217 /* Binary operators in order of decreasing precedence. */
219 { $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
221 $$.value
= (unsigned long) $1.value
* $3.value
;
224 $$.value
= $1.value
* $3.value
;
226 && ($$.value
/ $1.value
!= $3.value
227 ||
($$.value
& $1.value
& $3.value
) < 0))
233 if
(!skip_evaluation
)
234 error ("division by zero in #if");
237 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
239 $$.value
= (unsigned long) $1.value
/ $3.value
;
242 $$.value
= $1.value
/ $3.value
;
243 if
(($$.value
& $1.value
& $3.value
) < 0)
249 if
(!skip_evaluation
)
250 error ("division by zero in #if");
253 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
255 $$.value
= (unsigned long) $1.value %
$3.value
;
257 $$.value
= $1.value %
$3.value
; }
259 { $$.value
= $1.value
+ $3.value
;
260 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
262 && ! possible_sum_sign
($1.value
, $3.value
,
264 integer_overflow
(); }
266 { $$.value
= $1.value
- $3.value
;
267 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
269 && ! possible_sum_sign
($$.value
, $3.value
,
271 integer_overflow
(); }
273 { $$.unsignedp
= $1.unsignedp
;
274 if
($3.value
< 0 && ! $3.unsignedp
)
275 $$.value
= right_shift
(&$1, -$3.value
);
277 $$.value
= left_shift
(&$1, $3.value
); }
279 { $$.unsignedp
= $1.unsignedp
;
280 if
($3.value
< 0 && ! $3.unsignedp
)
281 $$.value
= left_shift
(&$1, -$3.value
);
283 $$.value
= right_shift
(&$1, $3.value
); }
285 { $$.value
= ($1.value
== $3.value
);
288 { $$.value
= ($1.value
!= $3.value
);
292 if
($1.unsignedp ||
$3.unsignedp
)
293 $$.value
= (unsigned long) $1.value
<= $3.value
;
295 $$.value
= $1.value
<= $3.value
; }
298 if
($1.unsignedp ||
$3.unsignedp
)
299 $$.value
= (unsigned long) $1.value
>= $3.value
;
301 $$.value
= $1.value
>= $3.value
; }
304 if
($1.unsignedp ||
$3.unsignedp
)
305 $$.value
= (unsigned long) $1.value
< $3.value
;
307 $$.value
= $1.value
< $3.value
; }
310 if
($1.unsignedp ||
$3.unsignedp
)
311 $$.value
= (unsigned long) $1.value
> $3.value
;
313 $$.value
= $1.value
> $3.value
; }
315 { $$.value
= $1.value
& $3.value
;
316 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
318 { $$.value
= $1.value ^
$3.value
;
319 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
321 { $$.value
= $1.value |
$3.value
;
322 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
324 { skip_evaluation
+= !$1.value
; }
326 { skip_evaluation
-= !$1.value
;
327 $$.value
= ($1.value
&& $4.value
);
330 { skip_evaluation
+= !!$1.value
; }
332 { skip_evaluation
-= !!$1.value
;
333 $$.value
= ($1.value ||
$4.value
);
336 { skip_evaluation
+= !$1.value
; }
338 { skip_evaluation
+= !!$1.value
- !$1.value
; }
340 { skip_evaluation
-= !!$1.value
;
341 $$.value
= $1.value ?
$4.value
: $7.value
;
342 $$.unsignedp
= $4.unsignedp ||
$7.unsignedp
; }
344 { $$
= yylval.integer
; }
346 { $$
= yylval.integer
; }
354 |
'(' keywords
')' keywords
355 { struct arglist
*temp
;
356 $$
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
358 $$
->name
= (U_CHAR
*) "(";
361 while
(temp
!= 0 && temp
->next
!= 0)
363 temp
->next
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
364 temp
->next
->next
= $4;
365 temp
->next
->name
= (U_CHAR
*) ")";
366 temp
->next
->length
= 1; }
368 { $$
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
369 $$
->name
= $1.address
;
370 $$
->length
= $1.length
;
375 /* During parsing of a C expression, the pointer to the next character
376 is in this variable. */
380 /* Take care of parsing a number (anything that starts with a digit).
381 Set yylval and return the token type; update lexptr.
382 LEN is the number of characters in it. */
384 /* maybe needs to actually deal with floating point numbers */
390 register
char *p
= lexptr
;
392 register
unsigned long n
= 0, nd
, ULONG_MAX_over_base
;
393 register
int base
= 10;
394 register
int len
= olen
;
395 register
int overflow
= 0;
396 register
int digit
, largest_digit
= 0;
399 for
(c
= 0; c
< len
; c
++)
401 /* It's a float since it contains a point. */
402 yyerror ("floating point numbers not allowed in #if expressions");
406 yylval.integer.unsignedp
= 0;
408 if
(len
>= 3 && (!strncmp
(p
, "0x", 2) ||
!strncmp
(p
, "0X", 2))) {
416 ULONG_MAX_over_base
= (unsigned long) -1 / base
;
418 for
(; len
> 0; len
--) {
421 if
(c
>= '0' && c
<= '9')
423 else if
(base
== 16 && c
>= 'a' && c
<= 'f')
424 digit
= c
- 'a' + 10;
425 else if
(base
== 16 && c
>= 'A' && c
<= 'F')
426 digit
= c
- 'A' + 10;
428 /* `l' means long, and `u' means unsigned. */
430 if
(c
== 'l' || c
== 'L')
433 yyerror ("two `l's in integer constant");
436 else if
(c
== 'u' || c
== 'U')
438 if
(yylval.integer.unsignedp
)
439 yyerror ("two `u's in integer constant");
440 yylval.integer.unsignedp
= 1;
449 /* Don't look for any more digits after the suffixes. */
452 if
(largest_digit
< digit
)
453 largest_digit
= digit
;
454 nd
= n
* base
+ digit
;
455 overflow |
= ULONG_MAX_over_base
< n | nd
< n
;
460 yyerror ("Invalid number in #if expression");
464 if
(base
<= largest_digit
)
465 warning
("integer constant contains digits beyond the radix");
468 warning
("integer constant out of range");
470 /* If too big to be signed, consider it unsigned. */
471 if
((long) n
< 0 && ! yylval.integer.unsignedp
)
474 warning
("integer constant is so large that it is unsigned");
475 yylval.integer.unsignedp
= 1;
479 yylval.integer.value
= n
;
488 static struct token tokentab2
[] = {
502 /* Read one token, getting characters through lexptr. */
508 register
int namelen
;
509 register
unsigned char *tokstart
;
510 register
struct token
*toktab
;
515 tokstart
= (unsigned char *) lexptr
;
517 /* See if it is a special token of length 2. */
518 if
(! keyword_parsing
)
519 for
(toktab
= tokentab2
; toktab
->operator
!= NULL
; toktab
++)
520 if
(c
== *toktab
->operator
&& tokstart
[1] == toktab
->operator
[1]) {
522 if
(toktab
->token
== ERROR
)
524 char *buf
= (char *) alloca
(40);
525 sprintf
(buf
, "`%s' not allowed in operand of `#if'", toktab
->operator
);
528 return toktab
->token
;
543 /* Capital L may start a wide-string or wide-character constant. */
544 if
(lexptr
[1] == '\'')
550 if
(lexptr
[1] == '"')
554 goto string_constant
;
562 if
(keyword_parsing
) {
563 char *start_ptr
= lexptr
- 1;
567 c
= parse_escape
(&lexptr
);
571 yylval.name.address
= tokstart
;
572 yylval.name.length
= lexptr
- start_ptr
;
576 /* This code for reading a character constant
577 handles multicharacter constants and wide characters.
578 It is mostly copied from c-lex.c. */
580 register
int result
= 0;
581 register num_chars
= 0;
582 unsigned width
= MAX_CHAR_TYPE_SIZE
;
588 width
= MAX_WCHAR_TYPE_SIZE
;
589 #ifdef MULTIBYTE_CHARS
590 max_chars
= MB_CUR_MAX
;
596 max_chars
= MAX_LONG_TYPE_SIZE
/ width
;
598 token_buffer
= (char *) alloca
(max_chars
+ 1);
604 if
(c
== '\'' || c
== EOF
)
609 c
= parse_escape
(&lexptr
);
610 if
(width
< HOST_BITS_PER_INT
611 && (unsigned) c
>= (1 << width
))
612 pedwarn
("escape sequence out of range for character");
617 /* Merge character into result; ignore excess chars. */
618 if
(num_chars
< max_chars
+ 1)
620 if
(width
< HOST_BITS_PER_INT
)
621 result
= (result
<< width
) |
(c
& ((1 << width
) - 1));
624 token_buffer
[num_chars
- 1] = c
;
628 token_buffer
[num_chars
] = 0;
631 error ("malformatted character constant");
632 else if
(num_chars
== 0)
633 error ("empty character constant");
634 else if
(num_chars
> max_chars
)
636 num_chars
= max_chars
;
637 error ("character constant too long");
639 else if
(num_chars
!= 1 && ! traditional
)
640 warning
("multi-character character constant");
642 /* If char type is signed, sign-extend the constant. */
645 int num_bits
= num_chars
* width
;
647 if
(lookup
("__CHAR_UNSIGNED__", sizeof
("__CHAR_UNSIGNED__")-1, -1)
648 ||
((result
>> (num_bits
- 1)) & 1) == 0)
650 = result
& ((unsigned long) ~
0 >> (HOST_BITS_PER_LONG
- num_bits
));
653 = result | ~
((unsigned long) ~
0 >> (HOST_BITS_PER_LONG
- num_bits
));
657 #ifdef MULTIBYTE_CHARS
658 /* Set the initial shift state and convert the next sequence. */
660 /* In all locales L'\0' is zero and mbtowc will return zero,
663 ||
(num_chars
== 1 && token_buffer
[0] != '\0'))
666 (void) mbtowc
(NULL_PTR
, NULL_PTR
, 0);
667 if
(mbtowc
(& wc
, token_buffer
, num_chars
) == num_chars
)
670 warning
("Ignoring invalid multibyte character");
673 yylval.integer.value
= result
;
677 /* This is always a signed type. */
678 yylval.integer.unsignedp
= 0;
682 /* some of these chars are invalid in constant expressions;
683 maybe do something about them later */
716 if
(keyword_parsing
) {
717 char *start_ptr
= lexptr
;
722 c
= parse_escape
(&lexptr
);
726 yylval.name.address
= tokstart
;
727 yylval.name.length
= lexptr
- start_ptr
;
730 yyerror ("string constants not allowed in #if expressions");
734 if
(c
>= '0' && c
<= '9' && !keyword_parsing
) {
737 c
= tokstart
[namelen
], is_idchar
[c
] || c
== '.';
740 return parse_number
(namelen
);
743 /* It is a name. See how long it is. */
745 if
(keyword_parsing
) {
746 for
(namelen
= 0;; namelen
++) {
747 if
(is_hor_space
[tokstart
[namelen
]])
749 if
(tokstart
[namelen
] == '(' || tokstart
[namelen
] == ')')
751 if
(tokstart
[namelen
] == '"' || tokstart
[namelen
] == '\'')
755 if
(!is_idstart
[c
]) {
756 yyerror ("Invalid token in expression");
760 for
(namelen
= 0; is_idchar
[tokstart
[namelen
]]; namelen
++)
765 yylval.name.address
= tokstart
;
766 yylval.name.length
= namelen
;
771 /* Parse a C escape sequence. STRING_PTR points to a variable
772 containing a pointer to the string to parse. That pointer
773 is updated past the characters we use. The value of the
774 escape sequence is returned.
776 A negative value means the sequence \ newline was seen,
777 which is supposed to be equivalent to nothing at all.
779 If \ is followed by a null character, we return a negative
780 value and leave the string pointer pointing at the null character.
782 If \ is followed by 000, we return 0 and leave the string pointer
783 after the zeros. A value of 0 does not mean end of string. */
786 parse_escape
(string_ptr
)
789 register
int c
= *(*string_ptr
)++;
799 pedwarn
("non-ANSI-standard escape sequence, `\\%c'", c
);
804 return TARGET_NEWLINE
;
826 register
int i
= c
- '0';
827 register
int count
= 0;
830 c
= *(*string_ptr
)++;
831 if
(c
>= '0' && c
<= '7')
832 i
= (i
<< 3) + c
- '0';
839 if
((i
& ~
((1 << MAX_CHAR_TYPE_SIZE
) - 1)) != 0)
841 i
&= (1 << MAX_CHAR_TYPE_SIZE
) - 1;
842 warning
("octal character constant does not fit in a byte");
848 register
unsigned i
= 0, overflow
= 0, digits_found
= 0, digit
;
851 c
= *(*string_ptr
)++;
852 if
(c
>= '0' && c
<= '9')
854 else if
(c
>= 'a' && c
<= 'f')
855 digit
= c
- 'a' + 10;
856 else if
(c
>= 'A' && c
<= 'F')
857 digit
= c
- 'A' + 10;
863 overflow |
= i ^
(i
<< 4 >> 4);
864 i
= (i
<< 4) + digit
;
868 yyerror ("\\x used with no following hex digits");
869 if
(overflow |
(i
& ~
((1 << BITS_PER_UNIT
) - 1)))
871 i
&= (1 << BITS_PER_UNIT
) - 1;
872 warning
("hex character constant does not fit in a byte");
887 longjmp
(parse_return_error
, 1);
893 if
(!skip_evaluation
&& pedantic
)
894 pedwarn
("integer overflow in preprocessor expression");
902 /* It's unclear from the C standard whether shifts can overflow.
903 The following code ignores overflow; perhaps a C standard
904 interpretation ruling is needed. */
905 if
(b
>= HOST_BITS_PER_LONG
)
907 else if
(a
->unsignedp
)
908 return
(unsigned long) a
->value
<< b
;
910 return a
->value
<< b
;
918 if
(b
>= HOST_BITS_PER_LONG
)
919 return a
->unsignedp ?
0 : a
->value
>> (HOST_BITS_PER_LONG
- 1);
920 else if
(a
->unsignedp
)
921 return
(unsigned long) a
->value
>> b
;
923 return a
->value
>> b
;
926 /* This page contains the entry point to this file. */
928 /* Parse STRING as an expression, and complain if this fails
929 to use up all of the contents of STRING. */
930 /* We do not support C comments. They should be removed before
931 this function is called. */
934 parse_c_expression
(string)
939 if
(lexptr
== 0 ||
*lexptr
== 0) {
940 error ("empty #if expression");
941 return
0; /* don't include the #if group */
944 /* if there is some sort of scanning error, just return 0 and assume
945 the parsing routine has printed an error message somewhere.
946 there is surely a better thing to do than this. */
947 if
(setjmp
(parse_return_error
))
951 return
0; /* actually this is never reached
952 the way things stand. */
954 error ("Junk after end of expression.");
956 return expression_value
; /* set by yyparse () */
959 #ifdef TEST_EXP_READER
962 /* Main program for testing purposes. */
972 initialize_random_junk
();
975 printf
("enter expression: ");
977 while
((buf
[n
] = getchar
()) != '\n' && buf
[n
] != EOF
)
982 printf
("parser returned %ld\n", parse_c_expression
(buf
));
988 /* table to tell if char can be part of a C identifier. */
989 unsigned char is_idchar
[256];
990 /* table to tell if char can be first char of a c identifier. */
991 unsigned char is_idstart
[256];
992 /* table to tell if c is horizontal space. isspace () thinks that
993 newline is space; this is not a good idea for this program. */
994 char is_hor_space
[256];
997 * initialize random junk in the hash table and maybe other places
999 initialize_random_junk
()
1004 * Set up is_idchar and is_idstart tables. These should be
1005 * faster than saying (is_alpha (c) || c == '_'), etc.
1006 * Must do set up these things before calling any routines tthat
1009 for
(i
= 'a'; i
<= 'z'; i
++) {
1010 ++is_idchar
[i
- 'a' + 'A'];
1012 ++is_idstart
[i
- 'a' + 'A'];
1015 for
(i
= '0'; i
<= '9'; i
++)
1019 #if DOLLARS_IN_IDENTIFIERS
1024 /* horizontal space table */
1025 ++is_hor_space
[' '];
1026 ++is_hor_space
['\t'];
1031 printf
("error: %s\n", msg
);
1036 printf
("warning: %s\n", msg
);
1040 lookup
(name
, len
, hash
)
1045 return
(DEFAULT_SIGNED_CHAR
) ?
0 : ((struct hashnode
*) -1);