1 /* Copyright (C) 1995 Free Software Foundation, Inc.
3 The GNU C Library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public License as
5 published by the Free Software Foundation; either version 2 of the
6 License, or (at your option) any later version.
8 The GNU C Library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
13 You should have received a copy of the GNU Library General Public
14 License along with the GNU C Library; see the file COPYING.LIB. If
15 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
16 Cambridge, MA 02139, USA. */
25 #include "localedef.h"
29 /* Include the hashing table for the keywords. */
30 const struct locale_keyword
* in_word_set (register const char *str
,
35 /* Contains the status of reading the locale definition file. */
36 struct locfile_data locfile_data
;
38 /* This is a flag used while collation input. This is the only place
39 where element names beside the ones defined in the character map are
40 allowed. There we must not give error messages. */
41 int reject_new_char
= 1;
43 /* Prototypes for local functions. */
44 static int get_char (void);
47 #define LD locfile_data
49 /* Opens the locale definition file and initializes the status data structure
50 for following calls of `locfile_lex'. */
52 locfile_open (const char *fname
)
55 /* We read from stdin. */
56 LD
.filename
= "<stdin>";
59 if (freopen (fname
, "r", stdin
) == NULL
)
60 error (4, 0, gettext ("input file `%s' not found"), fname
);
64 /* Set default values. */
65 LD
.escape_char
= '\\';
66 LD
.comment_char
= '#';
68 LD
.bufsize
= sysconf (_SC_LINE_MAX
);
69 LD
.buf
= (char *) xmalloc (LD
.bufsize
);
70 LD
.strbuf
= (char *) xmalloc (LD
.bufsize
);
72 LD
.buf_ptr
= LD
.returned_tokens
= LD
.line_no
= 0;
74 /* Now sign that we want immediately read a line. */
76 LD
.buf
[LD
.buf_ptr
] = '\0';
81 xlocfile_lex (char **token
, int *token_len
)
83 int retval
= locfile_lex (token
, token_len
);
86 /* I.e. end of file. */
87 error (4, 0, gettext ("%s: unexpected end of file in locale defintion "
88 "file"), locfile_data
.filename
);
94 locfile_lex (char **token
, int *token_len
)
105 /* Read the next line. Skip over empty lines and comments. */
106 if ((LD
.buf
[LD
.buf_ptr
] == '\0' && LD
.continue_line
!= 0)
107 || LD
.buf_ptr
>= LD
.bufsize
108 || (posix_conformance
== 0 && LD
.buf
[LD
.buf_ptr
] == LD
.comment_char
))
115 if (fgets (LD
.buf
, LD
.bufsize
, stdin
) == NULL
)
117 /* This makes subsequent calls also return EOF. */
122 /* Increment line number counter. */
125 /* We now have to look whether this line is continued and
126 whether it at all fits into our buffer. */
127 linelen
= strlen (LD
.buf
);
129 if (linelen
== LD
.bufsize
- 1)
130 /* The did not fit into the buffer. */
131 error (2, 0, gettext ("%s:%Zd: line too long; use "
132 "`getconf LINE_MAX' to get the maximum "
133 "line length"), LD
.filename
, LD
.line_no
);
135 /* Remove '\n' at end of line. */
136 if (LD
.buf
[linelen
- 1] == '\n')
137 LD
.buf
[--linelen
] = '\0';
139 if (linelen
> 0 && LD
.buf
[linelen
- 1] == LD
.escape_char
)
141 LD
.buf
[--linelen
] = '\0';
142 LD
.continue_line
= 1;
145 LD
.continue_line
= 0;
147 while (isspace (LD
.buf
[LD
.buf_ptr
]))
150 /* We are not so restrictive and allow white spaces before
152 if (posix_conformance
== 0
153 && LD
.buf
[LD
.buf_ptr
] == LD
.comment_char
155 error (0, 0, gettext ("%s:%Zd: comment does not start in "
156 "column 1"), LD
.filename
, LD
.line_no
);
158 while (LD
.buf
[LD
.buf_ptr
] == '\0'
159 || LD
.buf
[LD
.buf_ptr
] == LD
.comment_char
);
162 /* Get information for return values. */
163 *token
= LD
.buf
+ LD
.buf_ptr
;
164 start_ptr
= LD
.buf_ptr
;
166 /* If no further character is in the line this is the end of a logical
167 line. This information is needed in the parser. */
168 if (LD
.buf
[LD
.buf_ptr
] == '\0')
170 LD
.buf_ptr
= LD
.bufsize
;
171 retval
= TOK_ENDOFLINE
;
173 else if (isalpha (LD
.buf
[LD
.buf_ptr
]))
174 /* The token is an identifier. The POSIX standard does not say
175 what characters might be contained but offical POSIX locale
176 definition files contain beside alnum characters '_', '-' and
179 const struct locale_keyword
*kw
;
183 while (isalnum (LD
.buf
[LD
.buf_ptr
]) || LD
.buf
[LD
.buf_ptr
] == '_'
184 || LD
.buf
[LD
.buf_ptr
] == '-' || LD
.buf
[LD
.buf_ptr
] == '+');
186 /* Look in table of keywords. */
187 kw
= in_word_set (*token
, LD
.buf_ptr
- start_ptr
);
192 if (kw
->token_id
== TOK_ESCAPE_CHAR
193 || kw
->token_id
== TOK_COMMENT_CHAR
)
194 /* `escape_char' and `comment_char' are keywords for the
195 lexer. Do not give them to the parser. */
199 if (!isspace (LD
.buf
[LD
.buf_ptr
])
200 || (posix_conformance
&& LD
.returned_tokens
> 0))
201 error (0, 0, gettext ("%s:%Zd: syntax error in locale "
203 LD
.filename
, LD
.line_no
);
207 while (isspace (LD
.buf
[LD
.buf_ptr
]));
209 kw
->token_id
== TOK_ESCAPE_CHAR
211 : LD
.comment_char
= LD
.buf
[LD
.buf_ptr
++];
213 ignore_to_eol (0, posix_conformance
);
216 /* It is one of the normal keywords. */
217 retval
= kw
->token_id
;
220 *token_len
= LD
.buf_ptr
- start_ptr
;
222 else if (LD
.buf
[LD
.buf_ptr
] == '"')
223 /* Read a string. All symbolic character descriptions are expanded.
224 This has to be done in a local buffer because a simple symbolic
225 character like <A> may expand to upto 6 bytes. */
227 char *last
= LD
.strbuf
;
230 while (LD
.buf
[LD
.buf_ptr
] != '"')
232 int pre
= LD
.buf_ptr
;
233 int char_val
= get_char (); /* token, token_len); */
237 error (4, 0, gettext ("%s:%Zd: unterminated string at end "
238 "of line"), LD
.filename
, LD
.line_no
);
243 /* Unknown characters are simply not stored. */
244 last
+= char_to_utf (last
, char_val
);
247 char tmp
[LD
.buf_ptr
- pre
+ 1];
248 memcpy (tmp
, &LD
.buf
[pre
], LD
.buf_ptr
- pre
);
249 tmp
[LD
.buf_ptr
- pre
] = '\0';
250 error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
251 LD
.filename
, LD
.line_no
, tmp
);
254 if (LD
.buf
[LD
.buf_ptr
] != '\0')
259 *token_len
= last
- LD
.strbuf
;
262 else if (LD
.buf
[LD
.buf_ptr
] == '.' && LD
.buf
[LD
.buf_ptr
+ 1] == '.'
263 && LD
.buf
[LD
.buf_ptr
+ 2] == '.')
266 retval
= TOK_ELLIPSIS
;
268 else if (LD
.buf
[LD
.buf_ptr
] == LD
.escape_char
)
273 switch (LD
.buf
[LD
.buf_ptr
])
276 if (isdigit (LD
.buf
[++LD
.buf_ptr
]))
278 retval
= strtol (&LD
.buf
[LD
.buf_ptr
], &endp
, 16);
279 if (endp
- (LD
.buf
+ LD
.buf_ptr
) < 2 || retval
> 255)
282 LD
.buf_ptr
= endp
- LD
.buf
;
288 if (isdigit (LD
.buf
[++LD
.buf_ptr
]))
290 retval
= strtol (&LD
.buf
[LD
.buf_ptr
], &endp
, 10);
291 if (endp
- (LD
.buf
+ LD
.buf_ptr
) < 2 || retval
> 255)
294 LD
.buf_ptr
= endp
- LD
.buf
;
300 retval
= strtol (&LD
.buf
[LD
.buf_ptr
], &endp
, 8);
301 if (endp
- (LD
.buf
+ LD
.buf_ptr
) < 2 || retval
> 255)
302 retval
= LD
.buf
[LD
.buf_ptr
++];
304 LD
.buf_ptr
= endp
- LD
.buf
;
335 retval
= LD
.buf
[LD
.buf_ptr
++];
339 else if (isdigit (LD
.buf
[LD
.buf_ptr
]))
343 *token_len
= strtol (&LD
.buf
[LD
.buf_ptr
], &endp
, 10);
344 LD
.buf_ptr
= endp
- LD
.buf
;
347 else if (LD
.buf
[LD
.buf_ptr
] == '-' && LD
.buf
[LD
.buf_ptr
+ 1] == '1')
354 int ch
= get_char (); /* token, token_len); */
361 retval
= TOK_ILL_CHAR
;
364 /* Ignore white space. */
365 while (isspace (LD
.buf
[LD
.buf_ptr
]))
368 while (start_again
!= 0);
370 ++LD
.returned_tokens
;
375 /* Code a character with UTF-8 if the character map has multi-byte
378 char_to_utf (char *buf
, int char_val
)
380 if (charmap_data
.mb_cur_max
== 1)
387 /* The number of bits coded in each character. */
389 static struct coding_tab
401 { 0x7fffffff, 0xfc },
404 struct coding_tab
*t
;
408 for (t
= tab
; char_val
> t
->mask
; ++t
, ++cnt
)
416 *--buf
= 0x80 | (char_val
& ((1 << CBPC
) - 1));
421 *--buf
= t
->val
| char_val
;
428 /* Ignore rest of line upto ENDOFLINE token, starting with given token.
429 If WARN_FLAG is set warn about any token but ENDOFLINE. */
431 ignore_to_eol (int token
, int warn_flag
)
433 if (token
== TOK_ENDOFLINE
)
436 if (LD
.buf
[LD
.buf_ptr
] != '\0' && warn_flag
)
437 error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"),
438 locfile_data
.filename
, locfile_data
.line_no
);
440 while (LD
.continue_line
)
442 LD
.continue_line
= 0;
444 /* Increment line number counter. */
447 if (fgets (LD
.buf
, LD
.bufsize
, stdin
) != NULL
)
449 /* We now have to look whether this line is continued and
450 whether it at all fits into our buffer. */
451 int linelen
= strlen (LD
.buf
);
453 if (linelen
== LD
.bufsize
- 1)
454 /* The did not fit into the buffer. */
455 error (2, 0, gettext ("%s:%Zd: line too long; use `getconf "
456 "LINE_MAX' to get the current maximum "
457 "line length"), LD
.filename
, LD
.line_no
);
459 /* Remove '\n' at end of line. */
460 if (LD
.buf
[linelen
- 1] == '\n')
463 if (LD
.buf
[linelen
- 1] == LD
.escape_char
)
464 LD
.continue_line
= 1;
468 /* This causes to begin the next line. */
469 LD
.buf_ptr
= LD
.bufsize
;
473 /* Return the value of the character at the beginning of the input buffer.
474 Symbolic character constants are expanded. */
478 if (LD
.buf
[LD
.buf_ptr
] == '<')
479 /* This is a symbolic character name. */
482 char *startp
= LD
.buf
+ (++LD
.buf_ptr
);
485 while (LD
.buf
[LD
.buf_ptr
] != '>' && isprint (LD
.buf
[LD
.buf_ptr
]))
487 if (LD
.buf
[LD
.buf_ptr
] == '\0'
488 || (LD
.buf
[LD
.buf_ptr
] == LD
.escape_char
489 && LD
.buf
[++LD
.buf_ptr
] == '\0'))
492 *endp
++ = LD
.buf
[LD
.buf_ptr
++];
495 if (LD
.buf
[LD
.buf_ptr
] != '>' && LD
.buf
[LD
.buf_ptr
] == '\0')
497 error (0, 0, gettext ("%s:%Zd: end of line in character symbol"),
498 LD
.filename
, LD
.line_no
);
506 char_val
= find_char (startp
, endp
- startp
);
507 if (char_val
== -1 && verbose
!= 0 && reject_new_char
!= 0)
509 /* Locale defintions are often given very general. Missing
510 characters are only reported when explicitely requested. */
511 char tmp
[endp
- startp
+ 3];
514 memcpy (tmp
+ 1, startp
, endp
- startp
);
515 tmp
[endp
- startp
+ 1] = '>';
516 tmp
[endp
- startp
+ 2] = '\0';
518 error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
519 LD
.filename
, LD
.line_no
, tmp
);
525 return (int) LD
.buf
[LD
.buf_ptr
++];