1 /* Parse C expressions for CCCP.
2 Copyright (C) 1987, 1992, 1994 Free Software Foundation.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding!
22 Adapted from expread.y of GDB by Paul Rubin, July 1986. */
24 /* Parse a C expression from text in a string */
29 /* #define YYDEBUG 1 */
31 #ifdef MULTIBYTE_CHARS
38 typedef
unsigned char U_CHAR
;
40 /* This is used for communicating lists of keywords with cccp.c. */
48 /* Define a generic NULL if one hasn't already been defined. */
55 #if defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__)
56 #define GENERIC_PTR void *
58 #define GENERIC_PTR char *
62 /* Find the largest host integer type and set its size and type. */
64 #ifndef HOST_BITS_PER_WIDE_INT
66 #if HOST_BITS_PER_LONG > HOST_BITS_PER_INT
67 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_LONG
68 #define HOST_WIDE_INT long
70 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_INT
71 #define HOST_WIDE_INT int
77 #define NULL_PTR ((GENERIC_PTR)0)
82 HOST_WIDE_INT expression_value
;
84 static jmp_buf parse_return_error
;
86 /* Nonzero means count most punctuation as part of a name. */
87 static int keyword_parsing
= 0;
89 /* some external tables of character types */
90 extern
unsigned char is_idstart
[], is_idchar
[], is_hor_space
[];
92 extern
char *xmalloc
();
94 /* Flag for -pedantic. */
97 /* Flag for -traditional. */
98 extern
int traditional
;
100 #ifndef CHAR_TYPE_SIZE
101 #define CHAR_TYPE_SIZE BITS_PER_UNIT
104 #ifndef INT_TYPE_SIZE
105 #define INT_TYPE_SIZE BITS_PER_WORD
108 #ifndef LONG_TYPE_SIZE
109 #define LONG_TYPE_SIZE BITS_PER_WORD
112 #ifndef WCHAR_TYPE_SIZE
113 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
116 #ifndef MAX_CHAR_TYPE_SIZE
117 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
120 #ifndef MAX_INT_TYPE_SIZE
121 #define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
124 #ifndef MAX_LONG_TYPE_SIZE
125 #define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
128 #ifndef MAX_WCHAR_TYPE_SIZE
129 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
132 /* Yield nonzero if adding two numbers with A's and B's signs can yield a
133 number with SUM's sign, where A, B, and SUM are all C integers. */
134 #define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
136 static void integer_overflow
();
137 static long left_shift
();
138 static long right_shift
();
142 struct constant
{long value
; int unsignedp
;} integer
;
143 struct name
{U_CHAR
*address
; int length
;} name
;
144 struct arglist
*keywords
;
149 %type
<integer
> exp exp1 start
150 %type
<keywords
> keywords
151 %token
<integer
> INT CHAR
153 %token
<integer
> ERROR
163 %left
'<' '>' LEQ GEQ
174 { expression_value
= $1.value
; }
177 /* Expressions, including the comma operator. */
181 pedwarn
("comma operator in operand of `#if'");
185 /* Expressions, not including the comma operator. */
186 exp
: '-' exp %prec UNARY
187 { $$.value
= - $2.value
;
188 if
(($$.value
& $2.value
) < 0 && ! $2.unsignedp
)
190 $$.unsignedp
= $2.unsignedp
; }
191 |
'!' exp %prec UNARY
192 { $$.value
= ! $2.value
;
194 |
'+' exp %prec UNARY
196 |
'~' exp %prec UNARY
197 { $$.value
= ~
$2.value
;
198 $$.unsignedp
= $2.unsignedp
; }
200 { $$.value
= check_assertion
($2.address
, $2.length
,
204 { keyword_parsing
= 1; }
206 { $$.value
= check_assertion
($2.address
, $2.length
,
214 /* Binary operators in order of decreasing precedence. */
216 { $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
218 $$.value
= (unsigned long) $1.value
* $3.value
;
221 $$.value
= $1.value
* $3.value
;
223 && ($$.value
/ $1.value
!= $3.value
224 ||
($$.value
& $1.value
& $3.value
) < 0))
230 error ("division by zero in #if");
233 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
235 $$.value
= (unsigned long) $1.value
/ $3.value
;
238 $$.value
= $1.value
/ $3.value
;
239 if
(($$.value
& $1.value
& $3.value
) < 0)
245 error ("division by zero in #if");
248 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
250 $$.value
= (unsigned long) $1.value %
$3.value
;
252 $$.value
= $1.value %
$3.value
; }
254 { $$.value
= $1.value
+ $3.value
;
255 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
257 && ! possible_sum_sign
($1.value
, $3.value
,
259 integer_overflow
(); }
261 { $$.value
= $1.value
- $3.value
;
262 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
;
264 && ! possible_sum_sign
($$.value
, $3.value
,
266 integer_overflow
(); }
268 { $$.unsignedp
= $1.unsignedp
;
269 if
($3.value
< 0 && ! $3.unsignedp
)
270 $$.value
= right_shift
(&$1, -$3.value
);
272 $$.value
= left_shift
(&$1, $3.value
); }
274 { $$.unsignedp
= $1.unsignedp
;
275 if
($3.value
< 0 && ! $3.unsignedp
)
276 $$.value
= left_shift
(&$1, -$3.value
);
278 $$.value
= right_shift
(&$1, $3.value
); }
280 { $$.value
= ($1.value
== $3.value
);
283 { $$.value
= ($1.value
!= $3.value
);
287 if
($1.unsignedp ||
$3.unsignedp
)
288 $$.value
= (unsigned long) $1.value
<= $3.value
;
290 $$.value
= $1.value
<= $3.value
; }
293 if
($1.unsignedp ||
$3.unsignedp
)
294 $$.value
= (unsigned long) $1.value
>= $3.value
;
296 $$.value
= $1.value
>= $3.value
; }
299 if
($1.unsignedp ||
$3.unsignedp
)
300 $$.value
= (unsigned long) $1.value
< $3.value
;
302 $$.value
= $1.value
< $3.value
; }
305 if
($1.unsignedp ||
$3.unsignedp
)
306 $$.value
= (unsigned long) $1.value
> $3.value
;
308 $$.value
= $1.value
> $3.value
; }
310 { $$.value
= $1.value
& $3.value
;
311 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
313 { $$.value
= $1.value ^
$3.value
;
314 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
316 { $$.value
= $1.value |
$3.value
;
317 $$.unsignedp
= $1.unsignedp ||
$3.unsignedp
; }
319 { $$.value
= ($1.value
&& $3.value
);
322 { $$.value
= ($1.value ||
$3.value
);
324 | exp
'?' exp
':' exp
325 { $$.value
= $1.value ?
$3.value
: $5.value
;
326 $$.unsignedp
= $3.unsignedp ||
$5.unsignedp
; }
328 { $$
= yylval.integer
; }
330 { $$
= yylval.integer
; }
338 |
'(' keywords
')' keywords
339 { struct arglist
*temp
;
340 $$
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
342 $$
->name
= (U_CHAR
*) "(";
345 while
(temp
!= 0 && temp
->next
!= 0)
347 temp
->next
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
348 temp
->next
->next
= $4;
349 temp
->next
->name
= (U_CHAR
*) ")";
350 temp
->next
->length
= 1; }
352 { $$
= (struct arglist
*) xmalloc
(sizeof
(struct arglist
));
353 $$
->name
= $1.address
;
354 $$
->length
= $1.length
;
359 /* During parsing of a C expression, the pointer to the next character
360 is in this variable. */
364 /* Take care of parsing a number (anything that starts with a digit).
365 Set yylval and return the token type; update lexptr.
366 LEN is the number of characters in it. */
368 /* maybe needs to actually deal with floating point numbers */
374 register
char *p
= lexptr
;
376 register
unsigned long n
= 0, nd
, ULONG_MAX_over_base
;
377 register
int base
= 10;
378 register
int len
= olen
;
379 register
int overflow
= 0;
380 register
int digit
, largest_digit
= 0;
383 for
(c
= 0; c
< len
; c
++)
385 /* It's a float since it contains a point. */
386 yyerror ("floating point numbers not allowed in #if expressions");
390 yylval.integer.unsignedp
= 0;
392 if
(len
>= 3 && (!strncmp
(p
, "0x", 2) ||
!strncmp
(p
, "0X", 2))) {
400 ULONG_MAX_over_base
= (unsigned long) -1 / base
;
402 for
(; len
> 0; len
--) {
405 if
(c
>= '0' && c
<= '9')
407 else if
(base
== 16 && c
>= 'a' && c
<= 'f')
408 digit
= c
- 'a' + 10;
409 else if
(base
== 16 && c
>= 'A' && c
<= 'F')
410 digit
= c
- 'A' + 10;
412 /* `l' means long, and `u' means unsigned. */
414 if
(c
== 'l' || c
== 'L')
417 yyerror ("two `l's in integer constant");
420 else if
(c
== 'u' || c
== 'U')
422 if
(yylval.integer.unsignedp
)
423 yyerror ("two `u's in integer constant");
424 yylval.integer.unsignedp
= 1;
433 /* Don't look for any more digits after the suffixes. */
436 if
(largest_digit
< digit
)
437 largest_digit
= digit
;
438 nd
= n
* base
+ digit
;
439 overflow |
= ULONG_MAX_over_base
< n | nd
< n
;
444 yyerror ("Invalid number in #if expression");
448 if
(base
<= largest_digit
)
449 warning
("integer constant contains digits beyond the radix");
452 warning
("integer constant out of range");
454 /* If too big to be signed, consider it unsigned. */
455 if
((long) n
< 0 && ! yylval.integer.unsignedp
)
458 warning
("integer constant is so large that it is unsigned");
459 yylval.integer.unsignedp
= 1;
463 yylval.integer.value
= n
;
472 static struct token tokentab2
[] = {
486 /* Read one token, getting characters through lexptr. */
492 register
int namelen
;
493 register
unsigned char *tokstart
;
494 register
struct token
*toktab
;
499 tokstart
= (unsigned char *) lexptr
;
501 /* See if it is a special token of length 2. */
502 if
(! keyword_parsing
)
503 for
(toktab
= tokentab2
; toktab
->operator
!= NULL
; toktab
++)
504 if
(c
== *toktab
->operator
&& tokstart
[1] == toktab
->operator
[1]) {
506 if
(toktab
->token
== ERROR
)
508 char *buf
= (char *) alloca
(40);
509 sprintf
(buf
, "`%s' not allowed in operand of `#if'", toktab
->operator
);
512 return toktab
->token
;
527 /* Capital L may start a wide-string or wide-character constant. */
528 if
(lexptr
[1] == '\'')
534 if
(lexptr
[1] == '"')
538 goto string_constant
;
546 if
(keyword_parsing
) {
547 char *start_ptr
= lexptr
- 1;
551 c
= parse_escape
(&lexptr
);
555 yylval.name.address
= tokstart
;
556 yylval.name.length
= lexptr
- start_ptr
;
560 /* This code for reading a character constant
561 handles multicharacter constants and wide characters.
562 It is mostly copied from c-lex.c. */
564 register
int result
= 0;
565 register num_chars
= 0;
566 unsigned width
= MAX_CHAR_TYPE_SIZE
;
572 width
= MAX_WCHAR_TYPE_SIZE
;
573 #ifdef MULTIBYTE_CHARS
574 max_chars
= MB_CUR_MAX
;
580 max_chars
= MAX_LONG_TYPE_SIZE
/ width
;
582 token_buffer
= (char *) alloca
(max_chars
+ 1);
588 if
(c
== '\'' || c
== EOF
)
593 c
= parse_escape
(&lexptr
);
594 if
(width
< HOST_BITS_PER_INT
595 && (unsigned) c
>= (1 << width
))
596 pedwarn
("escape sequence out of range for character");
601 /* Merge character into result; ignore excess chars. */
602 if
(num_chars
< max_chars
+ 1)
604 if
(width
< HOST_BITS_PER_INT
)
605 result
= (result
<< width
) |
(c
& ((1 << width
) - 1));
608 token_buffer
[num_chars
- 1] = c
;
612 token_buffer
[num_chars
] = 0;
615 error ("malformatted character constant");
616 else if
(num_chars
== 0)
617 error ("empty character constant");
618 else if
(num_chars
> max_chars
)
620 num_chars
= max_chars
;
621 error ("character constant too long");
623 else if
(num_chars
!= 1 && ! traditional
)
624 warning
("multi-character character constant");
626 /* If char type is signed, sign-extend the constant. */
629 int num_bits
= num_chars
* width
;
631 if
(lookup
("__CHAR_UNSIGNED__", sizeof
("__CHAR_UNSIGNED__")-1, -1)
632 ||
((result
>> (num_bits
- 1)) & 1) == 0)
634 = result
& ((unsigned long) ~
0 >> (HOST_BITS_PER_LONG
- num_bits
));
637 = result | ~
((unsigned long) ~
0 >> (HOST_BITS_PER_LONG
- num_bits
));
641 #ifdef MULTIBYTE_CHARS
642 /* Set the initial shift state and convert the next sequence. */
644 /* In all locales L'\0' is zero and mbtowc will return zero,
647 ||
(num_chars
== 1 && token_buffer
[0] != '\0'))
650 (void) mbtowc
(NULL_PTR
, NULL_PTR
, 0);
651 if
(mbtowc
(& wc
, token_buffer
, num_chars
) == num_chars
)
654 warning
("Ignoring invalid multibyte character");
657 yylval.integer.value
= result
;
661 /* This is always a signed type. */
662 yylval.integer.unsignedp
= 0;
666 /* some of these chars are invalid in constant expressions;
667 maybe do something about them later */
700 if
(keyword_parsing
) {
701 char *start_ptr
= lexptr
;
706 c
= parse_escape
(&lexptr
);
710 yylval.name.address
= tokstart
;
711 yylval.name.length
= lexptr
- start_ptr
;
714 yyerror ("string constants not allowed in #if expressions");
718 if
(c
>= '0' && c
<= '9' && !keyword_parsing
) {
721 c
= tokstart
[namelen
], is_idchar
[c
] || c
== '.';
724 return parse_number
(namelen
);
727 /* It is a name. See how long it is. */
729 if
(keyword_parsing
) {
730 for
(namelen
= 0;; namelen
++) {
731 if
(is_hor_space
[tokstart
[namelen
]])
733 if
(tokstart
[namelen
] == '(' || tokstart
[namelen
] == ')')
735 if
(tokstart
[namelen
] == '"' || tokstart
[namelen
] == '\'')
739 if
(!is_idstart
[c
]) {
740 yyerror ("Invalid token in expression");
744 for
(namelen
= 0; is_idchar
[tokstart
[namelen
]]; namelen
++)
749 yylval.name.address
= tokstart
;
750 yylval.name.length
= namelen
;
755 /* Parse a C escape sequence. STRING_PTR points to a variable
756 containing a pointer to the string to parse. That pointer
757 is updated past the characters we use. The value of the
758 escape sequence is returned.
760 A negative value means the sequence \ newline was seen,
761 which is supposed to be equivalent to nothing at all.
763 If \ is followed by a null character, we return a negative
764 value and leave the string pointer pointing at the null character.
766 If \ is followed by 000, we return 0 and leave the string pointer
767 after the zeros. A value of 0 does not mean end of string. */
770 parse_escape
(string_ptr
)
773 register
int c
= *(*string_ptr
)++;
783 pedwarn
("non-ANSI-standard escape sequence, `\\%c'", c
);
788 return TARGET_NEWLINE
;
810 register
int i
= c
- '0';
811 register
int count
= 0;
814 c
= *(*string_ptr
)++;
815 if
(c
>= '0' && c
<= '7')
816 i
= (i
<< 3) + c
- '0';
823 if
((i
& ~
((1 << MAX_CHAR_TYPE_SIZE
) - 1)) != 0)
825 i
&= (1 << MAX_CHAR_TYPE_SIZE
) - 1;
826 warning
("octal character constant does not fit in a byte");
832 register
unsigned i
= 0, overflow
= 0, digits_found
= 0, digit
;
835 c
= *(*string_ptr
)++;
836 if
(c
>= '0' && c
<= '9')
838 else if
(c
>= 'a' && c
<= 'f')
839 digit
= c
- 'a' + 10;
840 else if
(c
>= 'A' && c
<= 'F')
841 digit
= c
- 'A' + 10;
847 overflow |
= i ^
(i
<< 4 >> 4);
848 i
= (i
<< 4) + digit
;
852 yyerror ("\\x used with no following hex digits");
853 if
(overflow |
(i
& ~
((1 << BITS_PER_UNIT
) - 1)))
855 i
&= (1 << BITS_PER_UNIT
) - 1;
856 warning
("hex character constant does not fit in a byte");
870 longjmp
(parse_return_error
, 1);
877 pedwarn
("integer overflow in preprocessor expression");
885 if
(b
>= HOST_BITS_PER_LONG
)
887 if
(! a
->unsignedp
&& a
->value
!= 0)
891 else if
(a
->unsignedp
)
892 return
(unsigned long) a
->value
<< b
;
895 long l
= a
->value
<< b
;
896 if
(l
>> b
!= a
->value
)
907 if
(b
>= HOST_BITS_PER_LONG
)
908 return a
->unsignedp ?
0 : a
->value
>> (HOST_BITS_PER_LONG
- 1);
909 else if
(a
->unsignedp
)
910 return
(unsigned long) a
->value
>> b
;
912 return a
->value
>> b
;
915 /* This page contains the entry point to this file. */
917 /* Parse STRING as an expression, and complain if this fails
918 to use up all of the contents of STRING. */
919 /* We do not support C comments. They should be removed before
920 this function is called. */
923 parse_c_expression
(string)
928 if
(lexptr
== 0 ||
*lexptr
== 0) {
929 error ("empty #if expression");
930 return
0; /* don't include the #if group */
933 /* if there is some sort of scanning error, just return 0 and assume
934 the parsing routine has printed an error message somewhere.
935 there is surely a better thing to do than this. */
936 if
(setjmp
(parse_return_error
))
940 return
0; /* actually this is never reached
941 the way things stand. */
943 error ("Junk after end of expression.");
945 return expression_value
; /* set by yyparse () */
948 #ifdef TEST_EXP_READER
951 /* Main program for testing purposes. */
961 initialize_random_junk
();
964 printf
("enter expression: ");
966 while
((buf
[n
] = getchar
()) != '\n' && buf
[n
] != EOF
)
971 printf
("parser returned %ld\n", parse_c_expression
(buf
));
977 /* table to tell if char can be part of a C identifier. */
978 unsigned char is_idchar
[256];
979 /* table to tell if char can be first char of a c identifier. */
980 unsigned char is_idstart
[256];
981 /* table to tell if c is horizontal space. isspace () thinks that
982 newline is space; this is not a good idea for this program. */
983 char is_hor_space
[256];
986 * initialize random junk in the hash table and maybe other places
988 initialize_random_junk
()
993 * Set up is_idchar and is_idstart tables. These should be
994 * faster than saying (is_alpha (c) || c == '_'), etc.
995 * Must do set up these things before calling any routines tthat
998 for
(i
= 'a'; i
<= 'z'; i
++) {
999 ++is_idchar
[i
- 'a' + 'A'];
1001 ++is_idstart
[i
- 'a' + 'A'];
1004 for
(i
= '0'; i
<= '9'; i
++)
1008 #if DOLLARS_IN_IDENTIFIERS
1013 /* horizontal space table */
1014 ++is_hor_space
[' '];
1015 ++is_hor_space
['\t'];
1020 printf
("error: %s\n", msg
);
1025 printf
("warning: %s\n", msg
);
1029 lookup
(name
, len
, hash
)
1034 return
(DEFAULT_SIGNED_CHAR
) ?
0 : ((struct hashnode
*) -1);