1 /* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
42 #include "catgetsinfo.h"
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
57 struct message_list
*next
;
65 struct message_list
*messages
;
72 struct set_list
*next
;
78 struct set_list
*all_sets
;
79 struct set_list
*current_set
;
80 size_t total_messages
;
84 struct obstack mem_pool
;
88 /* If non-zero force creation of new file, not using existing one. */
91 /* Name of output file. */
92 static const char *output_name
;
94 /* Name of generated C header file. */
95 static const char *header_name
;
97 /* Name and version of program. */
98 static void print_version (FILE *stream
, struct argp_state
*state
);
99 void (*argp_program_version_hook
) (FILE *, struct argp_state
*) = print_version
;
103 /* Definitions of arguments for argp functions. */
104 static const struct argp_option options
[] =
106 { "header", 'H', N_("NAME"), 0,
107 N_("Create C header file NAME containing symbol definitions") },
108 { "new", OPT_NEW
, NULL
, 0,
109 N_("Do not use existing catalog, force new output file") },
110 { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
111 { NULL
, 0, NULL
, 0, NULL
}
114 /* Short description of program. */
115 static const char doc
[] = N_("Generate message catalog.\
116 \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
117 is -, output is written to standard output.\n");
119 /* Strings for arguments in help texts. */
120 static const char args_doc
[] = N_("\
121 -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
123 /* Prototype for option handler. */
124 static error_t
parse_opt (int key
, char *arg
, struct argp_state
*state
);
126 /* Function to print some extra text in the help message. */
127 static char *more_help (int key
, const char *text
, void *input
);
129 /* Data structure to communicate with argp functions. */
130 static struct argp argp
=
132 options
, parse_opt
, args_doc
, doc
, NULL
, more_help
136 /* Wrapper functions with error checking for standard functions. */
137 extern void *xmalloc (size_t n
);
138 extern void *xcalloc (size_t n
, size_t s
);
140 /* Prototypes for local functions. */
141 static void error_print (void);
142 static struct catalog
*read_input_file (struct catalog
*current
,
144 static void write_out (struct catalog
*result
, const char *output_name
,
145 const char *header_name
);
146 static struct set_list
*find_set (struct catalog
*current
, int number
);
147 static void normalize_line (const char *fname
, size_t line
, char *string
,
149 static void read_old (struct catalog
*catalog
, const char *file_name
);
153 main (int argc
, char *argv
[])
155 struct catalog
*result
;
158 /* Set program name for messages. */
159 error_print_progname
= error_print
;
161 /* Set locale via LC_ALL. */
162 setlocale (LC_ALL
, "");
164 /* Set the text message domain. */
165 textdomain (PACKAGE
);
167 /* Initialize local variables. */
170 /* Parse and process arguments. */
171 argp_parse (&argp
, argc
, argv
, 0, &remaining
, NULL
);
173 /* Determine output file. */
174 if (output_name
== NULL
)
175 output_name
= remaining
< argc
? argv
[remaining
++] : "-";
177 /* Process all input files. */
178 setlocale (LC_CTYPE
, "C");
179 if (remaining
< argc
)
181 result
= read_input_file (result
, argv
[remaining
]);
182 while (++remaining
< argc
);
184 result
= read_input_file (NULL
, "-");
186 /* Write out the result. */
188 write_out (result
, output_name
, header_name
);
194 /* Handle program arguments. */
196 parse_opt (int key
, char *arg
, struct argp_state
*state
)
210 return ARGP_ERR_UNKNOWN
;
217 more_help (int key
, const char *text
, void *input
)
221 case ARGP_KEY_HELP_EXTRA
:
222 /* We print some extra information. */
223 return strdup (gettext ("\
224 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
228 return (char *) text
;
231 /* Print the version information. */
233 print_version (FILE *stream
, struct argp_state
*state
)
235 fprintf (stream
, "gencat (GNU %s) %s\n", PACKAGE
, VERSION
);
236 fprintf (stream
, gettext ("\
237 Copyright (C) %s Free Software Foundation, Inc.\n\
238 This is free software; see the source for copying conditions. There is NO\n\
239 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
241 fprintf (stream
, gettext ("Written by %s.\n"), "Ulrich Drepper");
245 /* The address of this function will be assigned to the hook in the
250 /* We don't want the program name to be printed in messages. Emacs'
251 compile.el does not like this. */
255 static struct catalog
*
256 read_input_file (struct catalog
*current
, const char *fname
)
263 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
266 fname
= gettext ("*standard input*");
269 fp
= fopen (fname
, "r");
272 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
276 /* If we haven't seen anything yet, allocate result structure. */
279 current
= (struct catalog
*) xcalloc (1, sizeof (*current
));
281 #define obstack_chunk_alloc malloc
282 #define obstack_chunk_free free
283 obstack_init (¤t
->mem_pool
);
285 current
->current_set
= find_set (current
, NL_SETD
);
295 size_t start_line
= line_number
+ 1;
302 act_len
= getline (&buf
, &len
, fp
);
307 /* It the line continued? */
308 if (buf
[act_len
- 1] == '\n')
311 continued
= buf
[act_len
- 1] == '\\';
318 /* Append to currently selected line. */
319 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
323 obstack_1grow (¤t
->mem_pool
, '\0');
324 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
327 if (this_line
[0] == '$')
329 if (isspace (this_line
[1]))
330 /* This is a comment line. Do nothing. */;
331 else if (strncmp (&this_line
[1], "set", 3) == 0)
333 int cnt
= sizeof ("set");
335 const char *symbol
= NULL
;
336 while (isspace (this_line
[cnt
]))
339 if (isdigit (this_line
[cnt
]))
341 set_number
= atol (&this_line
[cnt
]);
343 /* If the given number for the character set is
344 higher than any we used for symbolic set names
345 avoid clashing by using only higher numbers for
346 the following symbolic definitions. */
347 if (set_number
> current
->last_set
)
348 current
->last_set
= set_number
;
352 /* See whether it is a reasonable identifier. */
354 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
359 /* No correct character found. */
360 error_at_line (0, 0, fname
, start_line
,
361 gettext ("illegal set number"));
366 /* We have found seomthing that looks like a
367 correct identifier. */
368 struct set_list
*runp
;
370 this_line
[cnt
] = '\0';
372 symbol
= &this_line
[start
];
374 /* Test whether the identifier was already used. */
375 runp
= current
->all_sets
;
377 if (runp
->symbol
!= NULL
378 && strcmp (runp
->symbol
, symbol
) == 0)
385 /* We cannot allow duplicate identifiers for
387 error_at_line (0, 0, fname
, start_line
,
388 gettext ("duplicate set definition"));
389 error_at_line (0, 0, runp
->fname
, runp
->line
,
391 this is the first definition"));
395 /* Allocate next free message set for identifier. */
396 set_number
= ++current
->last_set
;
402 /* We found a legal set number. */
403 current
->current_set
= find_set (current
, set_number
);
406 current
->current_set
->symbol
= symbol
;
407 current
->current_set
->fname
= fname
;
408 current
->current_set
->line
= start_line
;
411 else if (strncmp (&this_line
[1], "delset", 6) == 0)
413 int cnt
= sizeof ("delset");
415 while (isspace (this_line
[cnt
]))
418 if (isdigit (this_line
[cnt
]))
420 size_t set_number
= atol (&this_line
[cnt
]);
421 struct set_list
*set
;
423 /* Mark the message set with the given number as
425 set
= find_set (current
, set_number
);
430 /* See whether it is a reasonable identifier. */
432 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
437 error_at_line (0, 0, fname
, start_line
,
438 gettext ("illegal set number"));
444 struct set_list
*runp
;
446 this_line
[cnt
] = '\0';
448 symbol
= &this_line
[start
];
450 /* We have a symbolic set name. This name must
451 appear somewhere else in the catalogs read so
454 for (runp
= current
->all_sets
; runp
!= NULL
;
457 if (strcmp (runp
->symbol
, symbol
) == 0)
464 /* Name does not exist before. */
465 error_at_line (0, 0, fname
, start_line
,
466 gettext ("unknown set `%s'"), symbol
);
470 else if (strncmp (&this_line
[1], "quote", 5) == 0)
472 int cnt
= sizeof ("quote");
473 while (isspace (this_line
[cnt
]))
475 /* Yes, the quote char can be '\0'; this means no quote
477 current
->quote_char
= this_line
[cnt
];
483 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
485 this_line
[cnt
] = '\0';
486 error_at_line (0, 0, fname
, start_line
,
487 gettext ("unknown directive `%s': line ignored"),
491 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
493 const char *ident
= this_line
;
498 while (this_line
[0] != '\0' && !isspace (this_line
[0]));;
499 this_line
[0] = '\0'; /* Terminate the identifier. */
503 while (isspace (this_line
[0]));
504 /* Now we found the beginning of the message itself. */
506 if (isdigit (ident
[0]))
508 struct message_list
*runp
;
510 message_number
= atoi (ident
);
512 /* Find location to insert the new message. */
513 runp
= current
->current_set
->messages
;
515 if (runp
->number
== message_number
)
521 /* Oh, oh. There is already a message with this
522 number is the message set. */
523 error_at_line (0, 0, fname
, start_line
,
524 gettext ("duplicated message number"));
525 error_at_line (0, 0, runp
->fname
, runp
->line
,
526 gettext ("this is the first definition"));
529 ident
= NULL
; /* We don't have a symbol. */
531 if (message_number
!= 0
532 && message_number
> current
->current_set
->last_message
)
533 current
->current_set
->last_message
= message_number
;
535 else if (ident
[0] != '\0')
537 struct message_list
*runp
;
538 runp
= current
->current_set
->messages
;
540 /* Test whether the symbolic name was not used for
541 another message in this message set. */
543 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
549 /* The name is already used. */
550 error_at_line (0, 0, fname
, start_line
,
551 gettext ("duplicated message identifier"));
552 error_at_line (0, 0, runp
->fname
, runp
->line
,
553 gettext ("this is the first definition"));
557 /* Give the message the next unused number. */
558 message_number
= ++current
->current_set
->last_message
;
563 if (message_number
!= 0)
565 struct message_list
*newp
;
567 used
= 1; /* Yes, we use the line. */
569 /* Strip quote characters, change escape sequences into
570 correct characters etc. */
571 normalize_line (fname
, start_line
, this_line
,
572 current
->quote_char
);
574 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
575 newp
->number
= message_number
;
576 newp
->message
= this_line
;
577 /* Remember symbolic name; is NULL if no is given. */
578 newp
->symbol
= ident
;
579 /* Remember where we found the character. */
581 newp
->line
= start_line
;
583 /* Find place to insert to message. We keep them in a
584 sorted single linked list. */
585 if (current
->current_set
->messages
== NULL
586 || current
->current_set
->messages
->number
> message_number
)
588 newp
->next
= current
->current_set
->messages
;
589 current
->current_set
->messages
= newp
;
593 struct message_list
*runp
;
594 runp
= current
->current_set
->messages
;
595 while (runp
->next
!= NULL
)
596 if (runp
->next
->number
> message_number
)
600 newp
->next
= runp
->next
;
604 ++current
->total_messages
;
611 /* See whether we have any non-white space character in this
613 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
616 if (this_line
[cnt
] != '\0')
617 /* Yes, some unknown characters found. */
618 error_at_line (0, 0, fname
, start_line
,
619 gettext ("malformed line ignored"));
622 /* We can save the memory for the line if it was not used. */
624 obstack_free (¤t
->mem_pool
, this_line
);
634 write_out (struct catalog
*catalog
, const char *output_name
,
635 const char *header_name
)
637 /* Computing the "optimal" size. */
638 struct set_list
*set_run
;
639 size_t best_total
, best_size
, best_depth
;
640 size_t act_size
, act_depth
;
641 struct catalog_obj obj
;
642 struct obstack string_pool
;
645 u_int32_t
*array1
, *array2
;
649 /* If not otherwise told try to read file with existing
652 read_old (catalog
, output_name
);
654 /* Initialize best_size with a very high value. */
655 best_total
= best_size
= best_depth
= UINT_MAX
;
657 /* We need some start size for testing. Let's start with
658 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
660 act_size
= 1 + catalog
->total_messages
/ 5;
662 /* We determine the size of a hash table here. Because the message
663 numbers can be chosen arbitrary by the programmer we cannot use
664 the simple method of accessing the array using the message
665 number. The algorithm is based on the trivial hash function
666 NUMBER % TABLE_SIZE, where collisions are stored in a second
667 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
668 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
669 while (act_size
<= best_total
)
671 size_t deep
[act_size
];
674 memset (deep
, '\0', act_size
* sizeof (size_t));
675 set_run
= catalog
->all_sets
;
676 while (set_run
!= NULL
)
678 struct message_list
*message_run
;
680 message_run
= set_run
->messages
;
681 while (message_run
!= NULL
)
683 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
686 if (deep
[idx
] > act_depth
)
688 act_depth
= deep
[idx
];
689 if (act_depth
* act_size
> best_total
)
692 message_run
= message_run
->next
;
694 set_run
= set_run
->next
;
697 if (act_depth
* act_size
<= best_total
)
699 /* We have found a better solution. */
700 best_total
= act_depth
* act_size
;
701 best_size
= act_size
;
702 best_depth
= act_depth
;
708 /* let's be prepared for an empty message file. */
709 if (best_size
== UINT_MAX
)
715 /* OK, now we have the size we will use. Fill in the header, build
716 the table and the second one with swapped byte order. */
717 obj
.magic
= CATGETS_MAGIC
;
718 obj
.plane_size
= best_size
;
719 obj
.plane_depth
= best_depth
;
721 /* Allocate room for all needed arrays. */
723 (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
724 memset (array1
, '\0', best_size
* best_depth
* sizeof (u_int32_t
) * 3);
726 = (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
727 obstack_init (&string_pool
);
729 set_run
= catalog
->all_sets
;
730 while (set_run
!= NULL
)
732 struct message_list
*message_run
;
734 message_run
= set_run
->messages
;
735 while (message_run
!= NULL
)
737 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
739 /* Determine collision depth. */
740 while (array1
[idx
] != 0)
741 idx
+= best_size
* 3;
743 /* Store set number, message number and pointer into string
744 space, relative to the first string. */
745 array1
[idx
+ 0] = set_run
->number
;
746 array1
[idx
+ 1] = message_run
->number
;
747 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
749 /* Add current string to the continuous space containing all
751 obstack_grow0 (&string_pool
, message_run
->message
,
752 strlen (message_run
->message
));
754 message_run
= message_run
->next
;
757 set_run
= set_run
->next
;
759 strings_size
= obstack_object_size (&string_pool
);
760 strings
= obstack_finish (&string_pool
);
762 /* Compute ARRAY2 by changing the byte order. */
763 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
764 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
766 /* Now we can write out the whole data. */
767 if (strcmp (output_name
, "-") == 0
768 || strcmp (output_name
, "/dev/stdout") == 0)
772 fd
= creat (output_name
, 0666);
774 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
778 /* Write out header. */
779 write (fd
, &obj
, sizeof (obj
));
781 /* We always write out the little endian version of the index
783 #if __BYTE_ORDER == __LITTLE_ENDIAN
784 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
785 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
786 #elif __BYTE_ORDER == __BIG_ENDIAN
787 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
788 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
790 # error Cannot handle __BYTE_ORDER byte order
793 /* Finally write the strings. */
794 write (fd
, strings
, strings_size
);
796 if (fd
!= STDOUT_FILENO
)
799 /* If requested now write out the header file. */
800 if (header_name
!= NULL
)
805 /* Open output file. "-" or "/dev/stdout" means write to
807 if (strcmp (header_name
, "-") == 0
808 || strcmp (header_name
, "/dev/stdout") == 0)
812 fp
= fopen (header_name
, "w");
814 error (EXIT_FAILURE
, errno
,
815 gettext ("cannot open output file `%s'"), header_name
);
818 /* Iterate over all sets and all messages. */
819 set_run
= catalog
->all_sets
;
820 while (set_run
!= NULL
)
822 struct message_list
*message_run
;
824 /* If the current message set has a symbolic name write this
826 if (set_run
->symbol
!= NULL
)
827 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
828 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
829 set_run
->fname
, set_run
->line
);
832 message_run
= set_run
->messages
;
833 while (message_run
!= NULL
)
835 /* If the current message has a symbolic name write
836 #define out. But we have to take care for the set
837 not having a symbolic name. */
838 if (message_run
->symbol
!= NULL
)
840 if (set_run
->symbol
== NULL
)
841 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
842 set_run
->number
, message_run
->symbol
,
843 message_run
->number
, message_run
->fname
,
846 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
847 set_run
->symbol
, message_run
->symbol
,
848 message_run
->number
, message_run
->fname
,
852 message_run
= message_run
->next
;
855 set_run
= set_run
->next
;
864 static struct set_list
*
865 find_set (struct catalog
*current
, int number
)
867 struct set_list
*result
= current
->all_sets
;
869 /* We must avoid set number 0 because a set of this number signals
870 in the tables that the entry is not occupied. */
873 while (result
!= NULL
)
874 if (result
->number
== number
)
877 result
= result
->next
;
879 /* Prepare new message set. */
880 result
= (struct set_list
*) xcalloc (1, sizeof (*result
));
881 result
->number
= number
;
882 result
->next
= current
->all_sets
;
883 current
->all_sets
= result
;
889 /* Normalize given string *in*place* by processing escape sequences
890 and quote characters. */
892 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
898 if (quote_char
!= '\0' && *rp
== quote_char
)
907 if (*rp
== quote_char
)
908 /* We simply end the string when we find the first time an
909 not-escaped quote character. */
911 else if (*rp
== '\\')
914 if (quote_char
!= '\0' && *rp
== quote_char
)
915 /* This is an extension to XPG. */
918 /* Recognize escape sequences. */
951 int number
= *rp
++ - '0';
952 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
955 number
+= *rp
++ - '0';
957 *wp
++ = (char) number
;
961 /* Simply ignore the backslash character. */
968 /* If we saw a quote character at the beginning we expect another
970 if (is_quoted
&& *rp
!= quote_char
)
971 error (0, 0, fname
, line
, gettext ("unterminated message"));
973 /* Terminate string. */
980 read_old (struct catalog
*catalog
, const char *file_name
)
982 struct catalog_info old_cat_obj
;
983 struct set_list
*set
= NULL
;
987 old_cat_obj
.status
= closed
;
988 old_cat_obj
.cat_name
= file_name
;
989 old_cat_obj
.nlspath
= NULL
;
990 __libc_lock_init (old_cat_obj
.lock
);
992 /* Try to open catalog, but don't look through the NLSPATH. */
993 __open_catalog (&old_cat_obj
);
995 if (old_cat_obj
.status
!= mmapped
&& old_cat_obj
.status
!= malloced
)
998 /* No problem, the catalog simply does not exist. */
1001 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
1004 /* OK, we have the catalog loaded. Now read all messages and merge
1005 them. When set and message number clash for any message the new
1007 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
1009 struct message_list
*message
, *last
;
1011 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
1012 /* No message in this slot. */
1015 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != (u_int32_t
) last_set
)
1017 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
1018 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
1022 message
= set
->messages
;
1023 while (message
!= NULL
)
1025 if ((u_int32_t
) message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1028 message
= message
->next
;
1032 || (u_int32_t
) message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1034 /* We have found a message which is not yet in the catalog.
1035 Insert it at the right position. */
1036 struct message_list
*newp
;
1038 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1039 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1041 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1044 newp
->symbol
= NULL
;
1045 newp
->next
= message
;
1048 set
->messages
= newp
;
1052 ++catalog
->total_messages
;