1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
42 #include "catgetsinfo.h"
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
57 struct message_list
*next
;
65 struct message_list
*messages
;
72 struct set_list
*next
;
78 struct set_list
*all_sets
;
79 struct set_list
*current_set
;
80 size_t total_messages
;
84 struct obstack mem_pool
;
88 /* If non-zero force creation of new file, not using existing one. */
92 static const struct option long_options
[] =
94 { "header", required_argument
, NULL
, 'H' },
95 { "help", no_argument
, NULL
, 'h' },
96 { "new", no_argument
, &force_new
, 1 },
97 { "output", required_argument
, NULL
, 'o' },
98 { "version", no_argument
, NULL
, 'V' },
102 /* Wrapper functions with error checking for standard functions. */
103 extern void *xmalloc (size_t n
);
105 /* Prototypes for local functions. */
106 static void usage (int status
) __attribute__ ((noreturn
));
107 static void error_print (void);
108 static struct catalog
*read_input_file (struct catalog
*current
,
110 static void write_out (struct catalog
*result
, const char *output_name
,
111 const char *header_name
);
112 static struct set_list
*find_set (struct catalog
*current
, int number
);
113 static void normalize_line (const char *fname
, size_t line
, char *string
,
115 static void read_old (struct catalog
*catalog
, const char *file_name
);
119 main (int argc
, char *argv
[])
121 struct catalog
*result
;
122 const char *output_name
;
123 const char *header_name
;
128 /* Set program name for messages. */
129 error_print_progname
= error_print
;
131 /* Set locale via LC_ALL. */
132 setlocale (LC_ALL
, "");
134 /* Set the text message domain. */
135 textdomain (PACKAGE
);
137 /* Initialize local variables. */
144 while ((opt
= getopt_long (argc
, argv
, "hH:o:V", long_options
, NULL
)) != EOF
)
147 case '\0': /* Long option. */
153 header_name
= optarg
;
156 output_name
= optarg
;
162 usage (EXIT_FAILURE
);
165 /* Version information is requested. */
168 printf ("gencat (GNU %s) %s\n", PACKAGE
, VERSION
);
170 Copyright (C) %s Free Software Foundation, Inc.\n\
171 This is free software; see the source for copying conditions. There is NO\n\
172 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
174 printf (_("Written by %s.\n"), "Ulrich Drepper");
179 /* Help is requested. */
181 usage (EXIT_SUCCESS
);
183 /* Determine output file. */
184 if (output_name
== NULL
)
185 output_name
= optind
< argc
? argv
[optind
++] : "-";
187 /* Process all input files. */
188 setlocale (LC_CTYPE
, "C");
191 result
= read_input_file (result
, argv
[optind
]);
192 while (++optind
< argc
);
194 result
= read_input_file (NULL
, "-");
196 /* Write out the result. */
198 write_out (result
, output_name
, header_name
);
207 if (status
!= EXIT_SUCCESS
)
208 fprintf (stderr
, gettext ("Try `%s --help' for more information.\n"),
209 program_invocation_name
);
213 Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n\
214 %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n\
215 Mandatory arguments to long options are mandatory for short options too.\n\
216 -H, --header create C header file containing symbol definitions\n\
217 -h, --help display this help and exit\n\
218 --new do not use existing catalog, force new output file\n\
219 -o, --output=NAME write output to file NAME\n\
220 -V, --version output version information and exit\n\
221 If INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
222 is -, output is written to standard output.\n"),
223 program_invocation_name
, program_invocation_name
);
224 fputs (gettext ("Report bugs to <bug-glibc@prep.ai.mit.edu>.\n"),
232 /* The address of this function will be assigned to the hook in the
237 /* We don't want the program name to be printed in messages. Emacs'
238 compile.el does not like this. */
242 static struct catalog
*
243 read_input_file (struct catalog
*current
, const char *fname
)
250 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
253 fname
= gettext ("*standard input*");
256 fp
= fopen (fname
, "r");
259 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
263 /* If we haven't seen anything yet, allocate result structure. */
266 current
= (struct catalog
*) xmalloc (sizeof (*current
));
268 current
->all_sets
= NULL
;
269 current
->total_messages
= 0;
270 current
->last_set
= 0;
271 current
->current_set
= find_set (current
, NL_SETD
);
273 #define obstack_chunk_alloc xmalloc
274 #define obstack_chunk_free free
275 obstack_init (¤t
->mem_pool
);
285 size_t start_line
= line_number
+ 1;
292 act_len
= getline (&buf
, &len
, fp
);
297 /* It the line continued? */
298 if (buf
[act_len
- 1] == '\n')
301 continued
= buf
[act_len
- 1] == '\\';
308 /* Append to currently selected line. */
309 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
313 obstack_1grow (¤t
->mem_pool
, '\0');
314 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
317 if (this_line
[0] == '$')
319 if (isspace (this_line
[1]))
320 /* This is a comment line. Do nothing. */;
321 else if (strncmp (&this_line
[1], "set", 3) == 0)
323 int cnt
= sizeof ("cnt");
325 const char *symbol
= NULL
;
326 while (isspace (this_line
[cnt
]))
329 if (isdigit (this_line
[cnt
]))
331 set_number
= atol (&this_line
[cnt
]);
333 /* If the given number for the character set is
334 higher than any we used for symbolic set names
335 avoid clashing by using only higher numbers for
336 the following symbolic definitions. */
337 if (set_number
> current
->last_set
)
338 current
->last_set
= set_number
;
342 /* See whether it is a reasonable identifier. */
344 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
349 /* No correct character found. */
350 error_at_line (0, 0, fname
, start_line
,
351 gettext ("illegal set number"));
356 /* We have found seomthing which looks like a
357 correct identifier. */
358 struct set_list
*runp
;
360 this_line
[cnt
] = '\0';
362 symbol
= &this_line
[start
];
364 /* Test whether the identifier was already used. */
365 runp
= current
->all_sets
;
367 if (runp
->symbol
!= NULL
368 && strcmp (runp
->symbol
, symbol
) == 0)
375 /* We cannot allow duplicate identifiers for
377 error_at_line (0, 0, fname
, start_line
,
378 gettext ("duplicate set definition"));
379 error_at_line (0, 0, runp
->fname
, runp
->line
,
381 this is the first definition"));
385 /* Allocate next free message set for identifier. */
386 set_number
= ++current
->last_set
;
392 /* We found a legal set number. */
393 current
->current_set
= find_set (current
, set_number
);
396 current
->current_set
->symbol
= symbol
;
397 current
->current_set
->fname
= fname
;
398 current
->current_set
->line
= start_line
;
401 else if (strncmp (&this_line
[1], "delset", 6) == 0)
403 int cnt
= sizeof ("delset");
405 while (isspace (this_line
[cnt
]))
408 if (isdigit (this_line
[cnt
]))
410 size_t set_number
= atol (&this_line
[cnt
]);
411 struct set_list
*set
;
413 /* Mark the message set with the given number as
415 set
= find_set (current
, set_number
);
420 /* See whether it is a reasonable identifier. */
422 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
427 error_at_line (0, 0, fname
, start_line
,
428 gettext ("illegal set number"));
434 struct set_list
*runp
;
436 this_line
[cnt
] = '\0';
438 symbol
= &this_line
[start
];
440 /* We have a symbolic set name. This name must
441 appear somewhere else in the catalogs read so
444 for (runp
= current
->all_sets
; runp
!= NULL
;
447 if (strcmp (runp
->symbol
, symbol
) == 0)
454 /* Name does not exist before. */
455 error_at_line (0, 0, fname
, start_line
,
456 gettext ("unknown set `%s'"), symbol
);
460 else if (strncmp (&this_line
[1], "quote", 5) == 0)
462 int cnt
= sizeof ("quote");
463 while (isspace (this_line
[cnt
]))
465 /* Yes, the quote char can be '\0'; this means no quote
467 current
->quote_char
= this_line
[cnt
];
473 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
475 this_line
[cnt
] = '\0';
476 error_at_line (0, 0, fname
, start_line
,
477 gettext ("unknown directive `%s': line ignored"),
481 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
483 const char *ident
= this_line
;
488 while (this_line
[0] != '\0' && !isspace (this_line
[0]));;
489 this_line
[0] = '\0'; /* Terminate the identifier. */
493 while (isspace (this_line
[0]));
494 /* Now we found the beginning of the message itself. */
496 if (isdigit (ident
[0]))
498 struct message_list
*runp
;
500 message_number
= atoi (ident
);
502 /* Find location to insert the new message. */
503 runp
= current
->current_set
->messages
;
505 if (runp
->number
== message_number
)
511 /* Oh, oh. There is already a message with this
512 number is the message set. */
513 error_at_line (0, 0, fname
, start_line
,
514 gettext ("duplicated message number"));
515 error_at_line (0, 0, runp
->fname
, runp
->line
,
516 gettext ("this is the first definition"));
519 ident
= NULL
; /* We don't have a symbol. */
521 if (message_number
!= 0
522 && message_number
> current
->current_set
->last_message
)
523 current
->current_set
->last_message
= message_number
;
525 else if (ident
[0] != '\0')
527 struct message_list
*runp
;
528 runp
= current
->current_set
->messages
;
530 /* Test whether the symbolic name was not used for
531 another message in this message set. */
533 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
539 /* The name is already used. */
540 error_at_line (0, 0, fname
, start_line
,
541 gettext ("duplicated message identifier"));
542 error_at_line (0, 0, runp
->fname
, runp
->line
,
543 gettext ("this is the first definition"));
547 /* Give the message the next unused number. */
548 message_number
= ++current
->current_set
->last_message
;
553 if (message_number
!= 0)
555 struct message_list
*newp
;
557 used
= 1; /* Yes, we use the line. */
559 /* Strip quote characters, change escape sequences into
560 correct characters etc. */
561 normalize_line (fname
, start_line
, this_line
,
562 current
->quote_char
);
564 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
565 newp
->number
= message_number
;
566 newp
->message
= this_line
;
567 /* Remember symbolic name; is NULL if no is given. */
568 newp
->symbol
= ident
;
569 /* Remember where we found the character. */
571 newp
->line
= start_line
;
573 /* Find place to insert to message. We keep them in a
574 sorted single linked list. */
575 if (current
->current_set
->messages
== NULL
576 || current
->current_set
->messages
->number
> message_number
)
578 newp
->next
= current
->current_set
->messages
;
579 current
->current_set
->messages
= newp
;
583 struct message_list
*runp
;
584 runp
= current
->current_set
->messages
;
585 while (runp
->next
!= NULL
)
586 if (runp
->next
->number
> message_number
)
590 newp
->next
= runp
->next
;
594 ++current
->total_messages
;
601 /* See whether we have any non-white space character in this
603 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
606 if (this_line
[cnt
] != '\0')
607 /* Yes, some unknown characters found. */
608 error_at_line (0, 0, fname
, start_line
,
609 gettext ("malformed line ignored"));
612 /* We can save the memory for the line if it was not used. */
614 obstack_free (¤t
->mem_pool
, this_line
);
624 write_out (struct catalog
*catalog
, const char *output_name
,
625 const char *header_name
)
627 /* Computing the "optimal" size. */
628 struct set_list
*set_run
;
629 size_t best_total
, best_size
, best_depth
;
630 size_t act_size
, act_depth
;
631 struct catalog_obj obj
;
632 struct obstack string_pool
;
635 u_int32_t
*array1
, *array2
;
639 /* If not otherwise told try to read file with existing
642 read_old (catalog
, output_name
);
644 /* Initialize best_size with a very high value. */
645 best_total
= best_size
= best_depth
= UINT_MAX
;
647 /* We need some start size for testing. Let's start with
648 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
650 act_size
= 1 + catalog
->total_messages
/ 5;
652 /* We determine the size of a hash table here. Because the message
653 numbers can be chosen arbitrary by the programmer we cannot use
654 the simple method of accessing the array using the message
655 number. The algorithm is based on the trivial hash function
656 NUMBER % TABLE_SIZE, where collisions are stored in a second
657 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
658 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
659 while (act_size
<= best_total
)
661 size_t deep
[act_size
];
664 memset (deep
, '\0', act_size
* sizeof (size_t));
665 set_run
= catalog
->all_sets
;
666 while (set_run
!= NULL
)
668 struct message_list
*message_run
;
670 message_run
= set_run
->messages
;
671 while (message_run
!= NULL
)
673 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
676 if (deep
[idx
] > act_depth
)
678 act_depth
= deep
[idx
];
679 if (act_depth
* act_size
> best_total
)
682 message_run
= message_run
->next
;
684 set_run
= set_run
->next
;
687 if (act_depth
* act_size
<= best_total
)
689 /* We have found a better solution. */
690 best_total
= act_depth
* act_size
;
691 best_size
= act_size
;
692 best_depth
= act_depth
;
698 /* let's be prepared for an empty message file. */
699 if (best_size
== UINT_MAX
)
705 /* OK, now we have the size we will use. Fill in the header, build
706 the table and the second one with swapped byte order. */
707 obj
.magic
= CATGETS_MAGIC
;
708 obj
.plane_size
= best_size
;
709 obj
.plane_depth
= best_depth
;
711 /* Allocate room for all needed arrays. */
713 (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
714 memset (array1
, '\0', best_size
* best_depth
* sizeof (u_int32_t
) * 3);
716 = (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
717 obstack_init (&string_pool
);
719 set_run
= catalog
->all_sets
;
720 while (set_run
!= NULL
)
722 struct message_list
*message_run
;
724 message_run
= set_run
->messages
;
725 while (message_run
!= NULL
)
727 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
729 /* Determine collision depth. */
730 while (array1
[idx
] != 0)
731 idx
+= best_size
* 3;
733 /* Store set number, message number and pointer into string
734 space, relative to the first string. */
735 array1
[idx
+ 0] = set_run
->number
;
736 array1
[idx
+ 1] = message_run
->number
;
737 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
739 /* Add current string to the continuous space containing all
741 obstack_grow0 (&string_pool
, message_run
->message
,
742 strlen (message_run
->message
));
744 message_run
= message_run
->next
;
747 set_run
= set_run
->next
;
749 strings_size
= obstack_object_size (&string_pool
);
750 strings
= obstack_finish (&string_pool
);
752 /* Compute ARRAY2 by changing the byte order. */
753 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
754 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
756 /* Now we can write out the whole data. */
757 if (strcmp (output_name
, "-") == 0
758 || strcmp (output_name
, "/dev/stdout") == 0)
762 fd
= creat (output_name
, 0666);
764 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
768 /* Write out header. */
769 write (fd
, &obj
, sizeof (obj
));
771 /* We always write out the little endian version of the index
773 #if __BYTE_ORDER == __LITTLE_ENDIAN
774 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
775 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
776 #elif __BYTE_ORDER == __BIG_ENDIAN
777 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
778 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
780 # error Cannot handle __BYTE_ORDER byte order
783 /* Finally write the strings. */
784 write (fd
, strings
, strings_size
);
786 if (fd
!= STDOUT_FILENO
)
789 /* If requested now write out the header file. */
790 if (header_name
!= NULL
)
795 /* Open output file. "-" or "/dev/stdout" means write to
797 if (strcmp (header_name
, "-") == 0
798 || strcmp (header_name
, "/dev/stdout") == 0)
802 fp
= fopen (header_name
, "w");
804 error (EXIT_FAILURE
, errno
,
805 gettext ("cannot open output file `%s'"), header_name
);
808 /* Iterate over all sets and all messages. */
809 set_run
= catalog
->all_sets
;
810 while (set_run
!= NULL
)
812 struct message_list
*message_run
;
814 /* If the current message set has a symbolic name write this
816 if (set_run
->symbol
!= NULL
)
817 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
818 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
819 set_run
->fname
, set_run
->line
);
822 message_run
= set_run
->messages
;
823 while (message_run
!= NULL
)
825 /* If the current message has a symbolic name write
826 #define out. But we have to take care for the set
827 not having a symbolic name. */
828 if (message_run
->symbol
!= NULL
)
829 if (set_run
->symbol
== NULL
)
830 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
831 set_run
->number
, message_run
->symbol
,
832 message_run
->number
, message_run
->fname
,
835 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
836 set_run
->symbol
, message_run
->symbol
,
837 message_run
->number
, message_run
->fname
,
840 message_run
= message_run
->next
;
843 set_run
= set_run
->next
;
852 static struct set_list
*
853 find_set (struct catalog
*current
, int number
)
855 struct set_list
*result
= current
->all_sets
;
857 /* We must avoid set number 0 because a set of this number signals
858 in the tables that the entry is not occupied. */
861 while (result
!= NULL
)
862 if (result
->number
== number
)
865 result
= result
->next
;
867 /* Prepare new message set. */
868 result
= (struct set_list
*) xmalloc (sizeof (*result
));
869 result
->number
= number
;
871 result
->messages
= NULL
;
872 result
->next
= current
->all_sets
;
873 current
->all_sets
= result
;
879 /* Normalize given string *in*place* by processing escape sequences
880 and quote characters. */
882 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
888 if (quote_char
!= '\0' && *rp
== quote_char
)
897 if (*rp
== quote_char
)
898 /* We simply end the string when we find the first time an
899 not-escaped quote character. */
901 else if (*rp
== '\\')
904 if (quote_char
!= '\0' && *rp
== quote_char
)
905 /* This is an extension to XPG. */
908 /* Recognize escape sequences. */
941 int number
= *rp
++ - '0';
942 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
945 number
+= *rp
++ - '0';
947 *wp
++ = (char) number
;
951 /* Simply ignore the backslash character. */
958 /* If we saw a quote character at the beginning we expect another
960 if (is_quoted
&& *rp
!= quote_char
)
961 error (0, 0, fname
, line
, gettext ("unterminated message"));
963 /* Terminate string. */
970 read_old (struct catalog
*catalog
, const char *file_name
)
972 struct catalog_info old_cat_obj
;
973 struct set_list
*set
= NULL
;
977 old_cat_obj
.status
= closed
;
978 old_cat_obj
.cat_name
= file_name
;
980 /* Try to open catalog, but don't look through the NLSPATH. */
981 __open_catalog (&old_cat_obj
, 0);
983 if (old_cat_obj
.status
!= mmaped
&& old_cat_obj
.status
!= malloced
)
985 /* No problem, the catalog simply does not exist. */
988 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
990 /* OK, we have the catalog loaded. Now read all messages and merge
991 them. When set and message number clash for any message the new
993 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
995 struct message_list
*message
, *last
;
997 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
998 /* No message in this slot. */
1001 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != (u_int32_t
) last_set
)
1003 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
1004 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
1008 message
= set
->messages
;
1009 while (message
!= NULL
)
1011 if ((u_int32_t
) message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1014 message
= message
->next
;
1018 || (u_int32_t
) message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1020 /* We have found a message which is not yet in the catalog.
1021 Insert it at the right position. */
1022 struct message_list
*newp
;
1024 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1025 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1027 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1030 newp
->symbol
= NULL
;
1031 newp
->next
= message
;
1034 set
->messages
= newp
;
1038 ++catalog
->total_messages
;