* malloc/mcheck.c (mabort): Declare prototype.
[glibc.git] / locale / locfile-lex.c
blob20e4f0f9cd7fc9c69d0ddfe1b1084c36ff30d6db
1 /* Copyright (C) 1995 Free Software Foundation, Inc.
3 The GNU C Library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public License as
5 published by the Free Software Foundation; either version 2 of the
6 License, or (at your option) any later version.
8 The GNU C Library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
13 You should have received a copy of the GNU Library General Public
14 License along with the GNU C Library; see the file COPYING.LIB. If
15 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
16 Cambridge, MA 02139, USA. */
18 #include <ctype.h>
19 #include <langinfo.h>
20 #include <libintl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
25 #include "localedef.h"
26 #include "token.h"
29 /* Include the hashing table for the keywords. */
30 const struct locale_keyword* in_word_set (register const char *str,
31 register int len);
32 #include "keyword.h"
35 /* Contains the status of reading the locale definition file. */
36 struct locfile_data locfile_data;
38 /* This is a flag used while collation input. This is the only place
39 where element names beside the ones defined in the character map are
40 allowed. There we must not give error messages. */
41 int reject_new_char = 1;
43 /* Prototypes for local functions. */
44 static int get_char (void);
47 #define LD locfile_data
49 /* Opens the locale definition file and initializes the status data structure
50 for following calls of `locfile_lex'. */
51 void
52 locfile_open (const char *fname)
54 if (fname == NULL)
55 /* We read from stdin. */
56 LD.filename = "<stdin>";
57 else
59 if (freopen (fname, "r", stdin) == NULL)
60 error (4, 0, gettext ("input file `%s' not found"), fname);
61 LD.filename = fname;
64 /* Set default values. */
65 LD.escape_char = '\\';
66 LD.comment_char = '#';
68 LD.bufsize = sysconf (_SC_LINE_MAX);
69 LD.buf = (char *) xmalloc (LD.bufsize);
70 LD.strbuf = (char *) xmalloc (LD.bufsize);
72 LD.buf_ptr = LD.returned_tokens = LD.line_no = 0;
74 /* Now sign that we want immediately read a line. */
75 LD.continue_line = 1;
76 LD.buf[LD.buf_ptr] = '\0';
80 int
81 xlocfile_lex (char **token, int *token_len)
83 int retval = locfile_lex (token, token_len);
85 if (retval == 0)
86 /* I.e. end of file. */
87 error (4, 0, gettext ("%s: unexpected end of file in locale defintion "
88 "file"), locfile_data.filename);
90 return retval;
93 int
94 locfile_lex (char **token, int *token_len)
96 int start_again;
97 int retval = 0;
101 int start_ptr;
103 start_again = 0;
105 /* Read the next line. Skip over empty lines and comments. */
106 if ((LD.buf[LD.buf_ptr] == '\0' && LD.continue_line != 0)
107 || LD.buf_ptr >= LD.bufsize
108 || (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char))
111 size_t linelen;
113 LD.buf_ptr = 0;
115 if (fgets (LD.buf, LD.bufsize, stdin) == NULL)
117 /* This makes subsequent calls also return EOF. */
118 LD.buf[0] = '\0';
119 return 0;
122 /* Increment line number counter. */
123 ++LD.line_no;
125 /* We now have to look whether this line is continued and
126 whether it at all fits into our buffer. */
127 linelen = strlen (LD.buf);
129 if (linelen == LD.bufsize - 1)
130 /* The did not fit into the buffer. */
131 error (2, 0, gettext ("%s:%Zd: line too long; use "
132 "`getconf LINE_MAX' to get the maximum "
133 "line length"), LD.filename, LD.line_no);
135 /* Remove '\n' at end of line. */
136 if (LD.buf[linelen - 1] == '\n')
137 LD.buf[--linelen] = '\0';
139 if (linelen > 0 && LD.buf[linelen - 1] == LD.escape_char)
141 LD.buf[--linelen] = '\0';
142 LD.continue_line = 1;
144 else
145 LD.continue_line = 0;
147 while (isspace (LD.buf[LD.buf_ptr]))
148 ++LD.buf_ptr;
150 /* We are not so restrictive and allow white spaces before
151 a comment. */
152 if (posix_conformance == 0
153 && LD.buf[LD.buf_ptr] == LD.comment_char
154 && LD.buf_ptr != 0)
155 error (0, 0, gettext ("%s:%Zd: comment does not start in "
156 "column 1"), LD.filename, LD.line_no);
158 while (LD.buf[LD.buf_ptr] == '\0'
159 || LD.buf[LD.buf_ptr] == LD.comment_char);
162 /* Get information for return values. */
163 *token = LD.buf + LD.buf_ptr;
164 start_ptr = LD.buf_ptr;
166 /* If no further character is in the line this is the end of a logical
167 line. This information is needed in the parser. */
168 if (LD.buf[LD.buf_ptr] == '\0')
170 LD.buf_ptr = LD.bufsize;
171 retval = TOK_ENDOFLINE;
173 else if (isalpha (LD.buf[LD.buf_ptr]))
174 /* The token is an identifier. The POSIX standard does not say
175 what characters might be contained but offical POSIX locale
176 definition files contain beside alnum characters '_', '-' and
177 '+'. */
179 const struct locale_keyword *kw;
182 ++LD.buf_ptr;
183 while (isalnum (LD.buf[LD.buf_ptr]) || LD.buf[LD.buf_ptr] == '_'
184 || LD.buf[LD.buf_ptr] == '-' || LD.buf[LD.buf_ptr] == '+');
186 /* Look in table of keywords. */
187 kw = in_word_set (*token, LD.buf_ptr - start_ptr);
188 if (kw == NULL)
189 retval = TOK_IDENT;
190 else
192 if (kw->token_id == TOK_ESCAPE_CHAR
193 || kw->token_id == TOK_COMMENT_CHAR)
194 /* `escape_char' and `comment_char' are keywords for the
195 lexer. Do not give them to the parser. */
197 start_again = 1;
199 if (!isspace (LD.buf[LD.buf_ptr])
200 || (posix_conformance && LD.returned_tokens > 0))
201 error (0, 0, gettext ("%s:%Zd: syntax error in locale "
202 "definition file"),
203 LD.filename, LD.line_no);
206 ++LD.buf_ptr;
207 while (isspace (LD.buf[LD.buf_ptr]));
209 kw->token_id == TOK_ESCAPE_CHAR
210 ? LD.escape_char
211 : LD.comment_char = LD.buf[LD.buf_ptr++];
213 ignore_to_eol (0, posix_conformance);
215 else
216 /* It is one of the normal keywords. */
217 retval = kw->token_id;
220 *token_len = LD.buf_ptr - start_ptr;
222 else if (LD.buf[LD.buf_ptr] == '"')
223 /* Read a string. All symbolic character descriptions are expanded.
224 This has to be done in a local buffer because a simple symbolic
225 character like <A> may expand to upto 6 bytes. */
227 char *last = LD.strbuf;
229 ++LD.buf_ptr;
230 while (LD.buf[LD.buf_ptr] != '"')
232 int pre = LD.buf_ptr;
233 int char_val = get_char (); /* token, token_len); */
235 if (char_val == 0)
237 error (4, 0, gettext ("%s:%Zd: unterminated string at end "
238 "of line"), LD.filename, LD.line_no);
239 /* NOTREACHED */
242 if (char_val > 0)
243 /* Unknown characters are simply not stored. */
244 last += char_to_utf (last, char_val);
245 else
247 char tmp[LD.buf_ptr - pre + 1];
248 memcpy (tmp, &LD.buf[pre], LD.buf_ptr - pre);
249 tmp[LD.buf_ptr - pre] = '\0';
250 error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
251 LD.filename, LD.line_no, tmp);
254 if (LD.buf[LD.buf_ptr] != '\0')
255 ++LD.buf_ptr;
257 *last = '\0';
258 *token = LD.strbuf;
259 *token_len = last - LD.strbuf;
260 retval = TOK_STRING;
262 else if (LD.buf[LD.buf_ptr] == '.' && LD.buf[LD.buf_ptr + 1] == '.'
263 && LD.buf[LD.buf_ptr + 2] == '.')
265 LD.buf_ptr += 3;
266 retval = TOK_ELLIPSIS;
268 else if (LD.buf[LD.buf_ptr] == LD.escape_char)
270 char *endp;
272 ++LD.buf_ptr;
273 switch (LD.buf[LD.buf_ptr])
275 case 'x':
276 if (isdigit (LD.buf[++LD.buf_ptr]))
278 retval = strtol (&LD.buf[LD.buf_ptr], &endp, 16);
279 if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
280 retval = 'x';
281 else
282 LD.buf_ptr = endp - LD.buf;
284 else
285 retval = 'x';
286 break;
287 case 'd':
288 if (isdigit (LD.buf[++LD.buf_ptr]))
290 retval = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
291 if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
292 retval = 'd';
293 else
294 LD.buf_ptr = endp - LD.buf;
296 else
297 retval = 'd';
298 break;
299 case '0'...'9':
300 retval = strtol (&LD.buf[LD.buf_ptr], &endp, 8);
301 if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
302 retval = LD.buf[LD.buf_ptr++];
303 else
304 LD.buf_ptr = endp - LD.buf;
305 break;
306 case 'a':
307 retval = '\a';
308 ++LD.buf_ptr;
309 break;
310 case 'b':
311 retval = '\b';
312 ++LD.buf_ptr;
313 break;
314 case 'f':
315 retval = '\f';
316 ++LD.buf_ptr;
317 break;
318 case 'n':
319 retval = '\n';
320 ++LD.buf_ptr;
321 break;
322 case 'r':
323 retval = '\r';
324 ++LD.buf_ptr;
325 break;
326 case 't':
327 retval = '\t';
328 ++LD.buf_ptr;
329 break;
330 case 'v':
331 retval = '\v';
332 ++LD.buf_ptr;
333 break;
334 default:
335 retval = LD.buf[LD.buf_ptr++];
336 break;
339 else if (isdigit (LD.buf[LD.buf_ptr]))
341 char *endp;
343 *token_len = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
344 LD.buf_ptr = endp - LD.buf;
345 retval = TOK_NUMBER;
347 else if (LD.buf[LD.buf_ptr] == '-' && LD.buf[LD.buf_ptr + 1] == '1')
349 LD.buf_ptr += 2;
350 retval = TOK_MINUS1;
352 else
354 int ch = get_char (); /* token, token_len); */
355 if (ch != -1)
357 *token_len = ch;
358 retval = TOK_CHAR;
360 else
361 retval = TOK_ILL_CHAR;
364 /* Ignore white space. */
365 while (isspace (LD.buf[LD.buf_ptr]))
366 ++LD.buf_ptr;
368 while (start_again != 0);
370 ++LD.returned_tokens;
371 return retval;
375 /* Code a character with UTF-8 if the character map has multi-byte
376 characters. */
378 char_to_utf (char *buf, int char_val)
380 if (charmap_data.mb_cur_max == 1)
382 *buf++ = char_val;
383 return 1;
385 else
387 /* The number of bits coded in each character. */
388 #define CBPC 6
389 static struct coding_tab
391 int mask;
392 int val;
394 tab[] =
396 { 0x7f, 0x00 },
397 { 0x7ff, 0xc0 },
398 { 0xffff, 0xe0 },
399 { 0x1fffff, 0xf0 },
400 { 0x3ffffff, 0xf8 },
401 { 0x7fffffff, 0xfc },
402 { 0, }
404 struct coding_tab *t;
405 int c;
406 int cnt = 1;
408 for (t = tab; char_val > t->mask; ++t, ++cnt)
411 c = cnt;
413 buf += cnt;
414 while (c > 1)
416 *--buf = 0x80 | (char_val & ((1 << CBPC) - 1));
417 char_val >>= CBPC;
418 --c;
421 *--buf = t->val | char_val;
423 return cnt;
428 /* Ignore rest of line upto ENDOFLINE token, starting with given token.
429 If WARN_FLAG is set warn about any token but ENDOFLINE. */
430 void
431 ignore_to_eol (int token, int warn_flag)
433 if (token == TOK_ENDOFLINE)
434 return;
436 if (LD.buf[LD.buf_ptr] != '\0' && warn_flag)
437 error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"),
438 locfile_data.filename, locfile_data.line_no);
440 while (LD.continue_line)
442 LD.continue_line = 0;
444 /* Increment line number counter. */
445 ++LD.line_no;
447 if (fgets (LD.buf, LD.bufsize, stdin) != NULL)
449 /* We now have to look whether this line is continued and
450 whether it at all fits into our buffer. */
451 int linelen = strlen (LD.buf);
453 if (linelen == LD.bufsize - 1)
454 /* The did not fit into the buffer. */
455 error (2, 0, gettext ("%s:%Zd: line too long; use `getconf "
456 "LINE_MAX' to get the current maximum "
457 "line length"), LD.filename, LD.line_no);
459 /* Remove '\n' at end of line. */
460 if (LD.buf[linelen - 1] == '\n')
461 --linelen;
463 if (LD.buf[linelen - 1] == LD.escape_char)
464 LD.continue_line = 1;
468 /* This causes to begin the next line. */
469 LD.buf_ptr = LD.bufsize;
473 /* Return the value of the character at the beginning of the input buffer.
474 Symbolic character constants are expanded. */
475 static int
476 get_char (void)
478 if (LD.buf[LD.buf_ptr] == '<')
479 /* This is a symbolic character name. */
481 int char_val;
482 char *startp = LD.buf + (++LD.buf_ptr);
483 char *endp = startp;
485 while (LD.buf[LD.buf_ptr] != '>' && isprint (LD.buf[LD.buf_ptr]))
487 if (LD.buf[LD.buf_ptr] == '\0'
488 || (LD.buf[LD.buf_ptr] == LD.escape_char
489 && LD.buf[++LD.buf_ptr] == '\0'))
490 break;
492 *endp++ = LD.buf[LD.buf_ptr++];
495 if (LD.buf[LD.buf_ptr] != '>' && LD.buf[LD.buf_ptr] == '\0')
497 error (0, 0, gettext ("%s:%Zd: end of line in character symbol"),
498 LD.filename, LD.line_no);
500 if (startp == endp)
501 return -1;
503 else
504 ++LD.buf_ptr;
506 char_val = find_char (startp, endp - startp);
507 if (char_val == -1 && verbose != 0 && reject_new_char != 0)
509 /* Locale defintions are often given very general. Missing
510 characters are only reported when explicitely requested. */
511 char tmp[endp - startp + 3];
513 tmp[0] = '<';
514 memcpy (tmp + 1, startp, endp - startp);
515 tmp[endp - startp + 1] = '>';
516 tmp[endp - startp + 2] = '\0';
518 error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
519 LD.filename, LD.line_no, tmp);
522 return char_val;
524 else
525 return (int) LD.buf[LD.buf_ptr++];
529 * Local Variables:
530 * mode:c
531 * c-basic-offset:2
532 * End: