1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
32 #include "linereader.h"
34 #include "stringtrans.h"
37 void *xmalloc (size_t __n
);
38 void *xrealloc (void *__p
, size_t __n
);
39 char *xstrdup (const char *__str
);
42 static struct token
*get_toplvl_escape (struct linereader
*lr
);
43 static struct token
*get_symname (struct linereader
*lr
);
44 static struct token
*get_ident (struct linereader
*lr
);
45 static struct token
*get_string (struct linereader
*lr
,
46 const struct charset_t
*charset
);
50 lr_open (const char *fname
, kw_hash_fct_t hf
)
53 struct linereader
*result
;
56 if (fname
== NULL
|| strcmp (fname
, "-") == 0
57 || strcmp (fname
, "/dev/stdin") == 0)
61 fp
= fopen (fname
, "r");
66 result
= (struct linereader
*) xmalloc (sizeof (*result
));
69 result
->fname
= xstrdup (fname
? : "<stdin>");
74 result
->comment_char
= '#';
75 result
->escape_char
= '\\';
76 result
->translate_strings
= 1;
78 n
= getdelim (&result
->buf
, &result
->bufsize
, '\n', result
->fp
);
89 if (n
> 1 && result
->buf
[n
- 2] == '\\' && result
->buf
[n
- 1] == '\n')
92 result
->buf
[n
] = '\0';
94 result
->hash_fct
= hf
;
101 lr_eof (struct linereader
*lr
)
103 return lr
->bufact
= 0;
108 lr_close (struct linereader
*lr
)
118 lr_next (struct linereader
*lr
)
122 n
= getdelim (&lr
->buf
, &lr
->bufsize
, '\n', lr
->fp
);
128 if (n
> 1 && lr
->buf
[n
- 2] == lr
->escape_char
&& lr
->buf
[n
- 1] == '\n')
130 /* An escaped newline character is substituted with a single <SP>. */
132 lr
->buf
[n
- 1] = ' ';
143 /* Defined in error.c. */
144 /* This variable is incremented each time `error' is called. */
145 extern unsigned int error_message_count
;
147 /* The calling program should define program_name and set it to the
148 name of the executing program. */
149 extern char *program_name
;
153 lr_token (struct linereader
*lr
, const struct charset_t
*charset
)
165 lr
->token
.tok
= tok_eol
;
169 while (isspace (ch
));
173 lr
->token
.tok
= tok_eof
;
177 if (ch
!= lr
->comment_char
)
180 /* Ignore rest of line. */
181 lr_ignore_rest (lr
, 0);
182 lr
->token
.tok
= tok_eol
;
186 /* Match escape sequences. */
187 if (ch
== lr
->escape_char
)
188 return get_toplvl_escape (lr
);
190 /* Match ellipsis. */
191 if (ch
== '.' && strncmp (&lr
->buf
[lr
->idx
], "..", 2) == 0)
195 lr
->token
.tok
= tok_ellipsis
;
202 return get_symname (lr
);
205 lr
->token
.tok
= tok_number
;
206 lr
->token
.val
.num
= ch
- '0';
208 while (isdigit (ch
= lr_getc (lr
)))
210 lr
->token
.val
.num
*= 10;
211 lr
->token
.val
.num
+= ch
- '0';
214 lr_error (lr
, _("garbage at end of digit"));
220 lr
->token
.tok
= tok_semicolon
;
224 lr
->token
.tok
= tok_comma
;
228 lr
->token
.tok
= tok_open_brace
;
232 lr
->token
.tok
= tok_close_brace
;
236 return get_string (lr
, charset
);
242 lr
->token
.tok
= tok_minus1
;
249 return get_ident (lr
);
253 static struct token
*
254 get_toplvl_escape (struct linereader
*lr
)
256 /* This is supposed to be a numeric value. We return the
257 numerical value and the number of bytes. */
258 size_t start_idx
= lr
->idx
- 1;
259 unsigned int value
= 0;
265 unsigned int byte
= 0;
266 unsigned int base
= 8;
281 if ((base
== 16 && !isxdigit (ch
))
282 || (base
!= 16 && (ch
< '0' || ch
>= '0' + base
)))
285 lr
->token
.val
.str
.start
= &lr
->buf
[start_idx
];
287 while (ch
!= EOF
|| !isspace (ch
))
289 lr
->token
.val
.str
.len
= lr
->idx
- start_idx
;
291 lr
->token
.tok
= tok_error
;
298 byte
= tolower (ch
) - 'a' + 10;
301 if ((base
== 16 && !isxdigit (ch
))
302 || (base
!= 16 && (ch
< '0' || ch
>= '0' + base
)))
309 byte
+= tolower (ch
) - 'a' + 10;
312 if (base
!= 16 && isdigit (ch
))
325 while (ch
== lr
->escape_char
&& nbytes
< 4);
328 lr_error (lr
, _("garbage at end of character code specification"));
332 lr
->token
.tok
= tok_charcode
;
333 lr
->token
.val
.charcode
.val
= value
;
334 lr
->token
.val
.charcode
.nbytes
= nbytes
;
343 if (bufact == bufmax) \
346 buf = xrealloc (buf, bufmax); \
348 buf[bufact++] = (ch); \
353 static struct token
*
354 get_symname (struct linereader
*lr
)
356 /* Symbol in brackets. We must distinguish three kinds:
358 2. ISO 10646 position values
363 const struct keyword_t
*kw
;
366 buf
= (char *) xmalloc (bufmax
);
371 if (ch
== lr
->escape_char
)
373 int c2
= lr_getc (lr
);
382 while (ch
!= '>' && ch
!= '\n');
385 lr_error (lr
, _("unterminated symbolic name"));
387 /* Test for ISO 10646 position value. */
388 if (buf
[0] == 'U' && (bufact
== 6 || bufact
== 10))
391 while (cp
< &buf
[bufact
- 1] && isxdigit (*cp
))
394 if (cp
== &buf
[bufact
- 1])
397 lr
->token
.tok
= bufact
== 6 ? tok_ucs2
: tok_ucs4
;
398 lr
->token
.val
.charcode
.val
= strtoul (buf
, NULL
, 16);
399 lr
->token
.val
.charcode
.nbytes
= lr
->token
.tok
== tok_ucs2
? 2 : 4;
405 /* It is a symbolic name. Test for reserved words. */
406 kw
= lr
->hash_fct (buf
, bufact
- 1);
408 if (kw
!= NULL
&& kw
->symname_or_ident
== 1)
410 lr
->token
.tok
= kw
->token
;
415 lr
->token
.tok
= tok_bsymbol
;
418 buf
= xrealloc (buf
, bufact
+ 1);
420 lr
->token
.val
.str
.start
= buf
;
421 lr
->token
.val
.str
.len
= bufact
- 1;
428 static struct token
*
429 get_ident (struct linereader
*lr
)
434 const struct keyword_t
*kw
;
437 buf
= xmalloc (bufmax
);
440 ADDC (lr
->buf
[lr
->idx
- 1]);
442 while (!isspace ((ch
= lr_getc (lr
))) && ch
!= '"' && ch
!= ';'
443 && ch
!= '<' && ch
!= ',')
444 /* XXX Handle escape sequences? */
449 kw
= lr
->hash_fct (buf
, bufact
);
451 if (kw
!= NULL
&& kw
->symname_or_ident
== 0)
453 lr
->token
.tok
= kw
->token
;
458 lr
->token
.tok
= tok_ident
;
461 buf
= xrealloc (buf
, bufact
+ 1);
463 lr
->token
.val
.str
.start
= buf
;
464 lr
->token
.val
.str
.len
= bufact
;
471 static struct token
*
472 get_string (struct linereader
*lr
, const struct charset_t
*charset
)
474 int illegal_string
= 0;
480 buf
= xmalloc (bufmax
);
483 while ((ch
= lr_getc (lr
)) != '"' && ch
!= '\n' && ch
!= EOF
)
484 if (ch
!= '<' || charset
== NULL
)
486 if (ch
== lr
->escape_char
)
489 if (ch
== '\n' || ch
== EOF
)
496 /* We have to get the value of the symbol. */
498 size_t startidx
= bufact
;
500 if (!lr
->translate_strings
)
503 while ((ch
= lr_getc (lr
)) != '>' && ch
!= '\n' && ch
!= EOF
)
505 if (ch
== lr
->escape_char
)
508 if (ch
== '\n' || ch
== EOF
)
514 if (ch
== '\n' || ch
== EOF
)
515 lr_error (lr
, _("unterminated string"));
517 if (!lr
->translate_strings
)
520 if (lr
->translate_strings
)
522 value
= charset_find_value (charset
, &buf
[startidx
],
524 if (value
== ILLEGAL_CHAR_VALUE
)
528 if (bufmax
- bufact
< 8)
531 buf
= (char *) xrealloc (buf
, bufmax
);
535 if (encode_char (value
, &cp
))
542 /* Catch errors with trailing escape character. */
543 if (bufact
> 0 && buf
[bufact
- 1] == lr
->escape_char
544 && (bufact
== 1 || buf
[bufact
- 2] != lr
->escape_char
))
546 lr_error (lr
, _("illegal escape sequence at end of string"));
549 else if (ch
== '\n' || ch
== EOF
)
550 lr_error (lr
, _("unterminated string"));
552 /* Terminate string if necessary. */
553 if (lr
->translate_strings
)
556 if (encode_char (0, &cp
))
564 lr
->token
.tok
= tok_string
;
569 lr
->token
.val
.str
.start
= NULL
;
570 lr
->token
.val
.str
.len
= 0;
574 buf
= xrealloc (buf
, bufact
+ 1);
576 lr
->token
.val
.str
.start
= buf
;
577 lr
->token
.val
.str
.len
= bufact
;