1 /* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
42 #include "catgetsinfo.h"
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
57 struct message_list
*next
;
65 struct message_list
*messages
;
72 struct set_list
*next
;
78 struct set_list
*all_sets
;
79 struct set_list
*current_set
;
80 size_t total_messages
;
84 struct obstack mem_pool
;
88 /* If non-zero force creation of new file, not using existing one. */
91 /* Name of output file. */
92 static const char *output_name
;
94 /* Name of generated C header file. */
95 static const char *header_name
;
97 /* Name and version of program. */
98 static void print_version (FILE *stream
, struct argp_state
*state
);
99 void (*argp_program_version_hook
) (FILE *, struct argp_state
*) = print_version
;
103 /* Definitions of arguments for argp functions. */
104 static const struct argp_option options
[] =
106 { "header", 'H', N_("NAME"), 0,
107 N_("Create C header file NAME containing symbol definitions") },
108 { "new", OPT_NEW
, NULL
, 0,
109 N_("Do not use existing catalog, force new output file") },
110 { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
111 { NULL
, 0, NULL
, 0, NULL
}
114 /* Short description of program. */
115 static const char doc
[] = N_("Generate message catalog.\
116 \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
117 is -, output is written to standard output.\n");
119 /* Strings for arguments in help texts. */
120 static const char args_doc
[] = N_("\
121 -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
123 /* Prototype for option handler. */
124 static error_t
parse_opt (int key
, char *arg
, struct argp_state
*state
);
126 /* Function to print some extra text in the help message. */
127 static char *more_help (int key
, const char *text
, void *input
);
129 /* Data structure to communicate with argp functions. */
130 static struct argp argp
=
132 options
, parse_opt
, args_doc
, doc
, NULL
, more_help
136 /* Wrapper functions with error checking for standard functions. */
137 extern void *xmalloc (size_t n
);
139 /* Prototypes for local functions. */
140 static void error_print (void);
141 static struct catalog
*read_input_file (struct catalog
*current
,
143 static void write_out (struct catalog
*result
, const char *output_name
,
144 const char *header_name
);
145 static struct set_list
*find_set (struct catalog
*current
, int number
);
146 static void normalize_line (const char *fname
, size_t line
, char *string
,
148 static void read_old (struct catalog
*catalog
, const char *file_name
);
152 main (int argc
, char *argv
[])
154 struct catalog
*result
;
157 /* Set program name for messages. */
158 error_print_progname
= error_print
;
160 /* Set locale via LC_ALL. */
161 setlocale (LC_ALL
, "");
163 /* Set the text message domain. */
164 textdomain (PACKAGE
);
166 /* Initialize local variables. */
169 /* Parse and process arguments. */
170 argp_parse (&argp
, argc
, argv
, 0, &remaining
, NULL
);
172 /* Determine output file. */
173 if (output_name
== NULL
)
174 output_name
= remaining
< argc
? argv
[remaining
++] : "-";
176 /* Process all input files. */
177 setlocale (LC_CTYPE
, "C");
178 if (remaining
< argc
)
180 result
= read_input_file (result
, argv
[remaining
]);
181 while (++remaining
< argc
);
183 result
= read_input_file (NULL
, "-");
185 /* Write out the result. */
187 write_out (result
, output_name
, header_name
);
193 /* Handle program arguments. */
195 parse_opt (int key
, char *arg
, struct argp_state
*state
)
209 return ARGP_ERR_UNKNOWN
;
216 more_help (int key
, const char *text
, void *input
)
220 case ARGP_KEY_HELP_EXTRA
:
221 /* We print some extra information. */
222 return strdup (gettext ("\
223 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
227 return (char *) text
;
230 /* Print the version information. */
232 print_version (FILE *stream
, struct argp_state
*state
)
234 fprintf (stream
, "gencat (GNU %s) %s\n", PACKAGE
, VERSION
);
235 fprintf (stream
, gettext ("\
236 Copyright (C) %s Free Software Foundation, Inc.\n\
237 This is free software; see the source for copying conditions. There is NO\n\
238 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
240 fprintf (stream
, gettext ("Written by %s.\n"), "Ulrich Drepper");
244 /* The address of this function will be assigned to the hook in the
249 /* We don't want the program name to be printed in messages. Emacs'
250 compile.el does not like this. */
254 static struct catalog
*
255 read_input_file (struct catalog
*current
, const char *fname
)
262 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
265 fname
= gettext ("*standard input*");
268 fp
= fopen (fname
, "r");
271 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
275 /* If we haven't seen anything yet, allocate result structure. */
278 current
= (struct catalog
*) xmalloc (sizeof (*current
));
280 current
->all_sets
= NULL
;
281 current
->total_messages
= 0;
282 current
->last_set
= 0;
283 current
->current_set
= find_set (current
, NL_SETD
);
285 #define obstack_chunk_alloc malloc
286 #define obstack_chunk_free free
287 obstack_init (¤t
->mem_pool
);
297 size_t start_line
= line_number
+ 1;
304 act_len
= getline (&buf
, &len
, fp
);
309 /* It the line continued? */
310 if (buf
[act_len
- 1] == '\n')
313 continued
= buf
[act_len
- 1] == '\\';
320 /* Append to currently selected line. */
321 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
325 obstack_1grow (¤t
->mem_pool
, '\0');
326 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
329 if (this_line
[0] == '$')
331 if (isspace (this_line
[1]))
332 /* This is a comment line. Do nothing. */;
333 else if (strncmp (&this_line
[1], "set", 3) == 0)
335 int cnt
= sizeof ("set");
337 const char *symbol
= NULL
;
338 while (isspace (this_line
[cnt
]))
341 if (isdigit (this_line
[cnt
]))
343 set_number
= atol (&this_line
[cnt
]);
345 /* If the given number for the character set is
346 higher than any we used for symbolic set names
347 avoid clashing by using only higher numbers for
348 the following symbolic definitions. */
349 if (set_number
> current
->last_set
)
350 current
->last_set
= set_number
;
354 /* See whether it is a reasonable identifier. */
356 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
361 /* No correct character found. */
362 error_at_line (0, 0, fname
, start_line
,
363 gettext ("illegal set number"));
368 /* We have found seomthing that looks like a
369 correct identifier. */
370 struct set_list
*runp
;
372 this_line
[cnt
] = '\0';
374 symbol
= &this_line
[start
];
376 /* Test whether the identifier was already used. */
377 runp
= current
->all_sets
;
379 if (runp
->symbol
!= NULL
380 && strcmp (runp
->symbol
, symbol
) == 0)
387 /* We cannot allow duplicate identifiers for
389 error_at_line (0, 0, fname
, start_line
,
390 gettext ("duplicate set definition"));
391 error_at_line (0, 0, runp
->fname
, runp
->line
,
393 this is the first definition"));
397 /* Allocate next free message set for identifier. */
398 set_number
= ++current
->last_set
;
404 /* We found a legal set number. */
405 current
->current_set
= find_set (current
, set_number
);
408 current
->current_set
->symbol
= symbol
;
409 current
->current_set
->fname
= fname
;
410 current
->current_set
->line
= start_line
;
413 else if (strncmp (&this_line
[1], "delset", 6) == 0)
415 int cnt
= sizeof ("delset");
417 while (isspace (this_line
[cnt
]))
420 if (isdigit (this_line
[cnt
]))
422 size_t set_number
= atol (&this_line
[cnt
]);
423 struct set_list
*set
;
425 /* Mark the message set with the given number as
427 set
= find_set (current
, set_number
);
432 /* See whether it is a reasonable identifier. */
434 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
439 error_at_line (0, 0, fname
, start_line
,
440 gettext ("illegal set number"));
446 struct set_list
*runp
;
448 this_line
[cnt
] = '\0';
450 symbol
= &this_line
[start
];
452 /* We have a symbolic set name. This name must
453 appear somewhere else in the catalogs read so
456 for (runp
= current
->all_sets
; runp
!= NULL
;
459 if (strcmp (runp
->symbol
, symbol
) == 0)
466 /* Name does not exist before. */
467 error_at_line (0, 0, fname
, start_line
,
468 gettext ("unknown set `%s'"), symbol
);
472 else if (strncmp (&this_line
[1], "quote", 5) == 0)
474 int cnt
= sizeof ("quote");
475 while (isspace (this_line
[cnt
]))
477 /* Yes, the quote char can be '\0'; this means no quote
479 current
->quote_char
= this_line
[cnt
];
485 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
487 this_line
[cnt
] = '\0';
488 error_at_line (0, 0, fname
, start_line
,
489 gettext ("unknown directive `%s': line ignored"),
493 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
495 const char *ident
= this_line
;
500 while (this_line
[0] != '\0' && !isspace (this_line
[0]));;
501 this_line
[0] = '\0'; /* Terminate the identifier. */
505 while (isspace (this_line
[0]));
506 /* Now we found the beginning of the message itself. */
508 if (isdigit (ident
[0]))
510 struct message_list
*runp
;
512 message_number
= atoi (ident
);
514 /* Find location to insert the new message. */
515 runp
= current
->current_set
->messages
;
517 if (runp
->number
== message_number
)
523 /* Oh, oh. There is already a message with this
524 number is the message set. */
525 error_at_line (0, 0, fname
, start_line
,
526 gettext ("duplicated message number"));
527 error_at_line (0, 0, runp
->fname
, runp
->line
,
528 gettext ("this is the first definition"));
531 ident
= NULL
; /* We don't have a symbol. */
533 if (message_number
!= 0
534 && message_number
> current
->current_set
->last_message
)
535 current
->current_set
->last_message
= message_number
;
537 else if (ident
[0] != '\0')
539 struct message_list
*runp
;
540 runp
= current
->current_set
->messages
;
542 /* Test whether the symbolic name was not used for
543 another message in this message set. */
545 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
551 /* The name is already used. */
552 error_at_line (0, 0, fname
, start_line
,
553 gettext ("duplicated message identifier"));
554 error_at_line (0, 0, runp
->fname
, runp
->line
,
555 gettext ("this is the first definition"));
559 /* Give the message the next unused number. */
560 message_number
= ++current
->current_set
->last_message
;
565 if (message_number
!= 0)
567 struct message_list
*newp
;
569 used
= 1; /* Yes, we use the line. */
571 /* Strip quote characters, change escape sequences into
572 correct characters etc. */
573 normalize_line (fname
, start_line
, this_line
,
574 current
->quote_char
);
576 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
577 newp
->number
= message_number
;
578 newp
->message
= this_line
;
579 /* Remember symbolic name; is NULL if no is given. */
580 newp
->symbol
= ident
;
581 /* Remember where we found the character. */
583 newp
->line
= start_line
;
585 /* Find place to insert to message. We keep them in a
586 sorted single linked list. */
587 if (current
->current_set
->messages
== NULL
588 || current
->current_set
->messages
->number
> message_number
)
590 newp
->next
= current
->current_set
->messages
;
591 current
->current_set
->messages
= newp
;
595 struct message_list
*runp
;
596 runp
= current
->current_set
->messages
;
597 while (runp
->next
!= NULL
)
598 if (runp
->next
->number
> message_number
)
602 newp
->next
= runp
->next
;
606 ++current
->total_messages
;
613 /* See whether we have any non-white space character in this
615 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
618 if (this_line
[cnt
] != '\0')
619 /* Yes, some unknown characters found. */
620 error_at_line (0, 0, fname
, start_line
,
621 gettext ("malformed line ignored"));
624 /* We can save the memory for the line if it was not used. */
626 obstack_free (¤t
->mem_pool
, this_line
);
636 write_out (struct catalog
*catalog
, const char *output_name
,
637 const char *header_name
)
639 /* Computing the "optimal" size. */
640 struct set_list
*set_run
;
641 size_t best_total
, best_size
, best_depth
;
642 size_t act_size
, act_depth
;
643 struct catalog_obj obj
;
644 struct obstack string_pool
;
647 u_int32_t
*array1
, *array2
;
651 /* If not otherwise told try to read file with existing
654 read_old (catalog
, output_name
);
656 /* Initialize best_size with a very high value. */
657 best_total
= best_size
= best_depth
= UINT_MAX
;
659 /* We need some start size for testing. Let's start with
660 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
662 act_size
= 1 + catalog
->total_messages
/ 5;
664 /* We determine the size of a hash table here. Because the message
665 numbers can be chosen arbitrary by the programmer we cannot use
666 the simple method of accessing the array using the message
667 number. The algorithm is based on the trivial hash function
668 NUMBER % TABLE_SIZE, where collisions are stored in a second
669 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
670 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
671 while (act_size
<= best_total
)
673 size_t deep
[act_size
];
676 memset (deep
, '\0', act_size
* sizeof (size_t));
677 set_run
= catalog
->all_sets
;
678 while (set_run
!= NULL
)
680 struct message_list
*message_run
;
682 message_run
= set_run
->messages
;
683 while (message_run
!= NULL
)
685 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
688 if (deep
[idx
] > act_depth
)
690 act_depth
= deep
[idx
];
691 if (act_depth
* act_size
> best_total
)
694 message_run
= message_run
->next
;
696 set_run
= set_run
->next
;
699 if (act_depth
* act_size
<= best_total
)
701 /* We have found a better solution. */
702 best_total
= act_depth
* act_size
;
703 best_size
= act_size
;
704 best_depth
= act_depth
;
710 /* let's be prepared for an empty message file. */
711 if (best_size
== UINT_MAX
)
717 /* OK, now we have the size we will use. Fill in the header, build
718 the table and the second one with swapped byte order. */
719 obj
.magic
= CATGETS_MAGIC
;
720 obj
.plane_size
= best_size
;
721 obj
.plane_depth
= best_depth
;
723 /* Allocate room for all needed arrays. */
725 (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
726 memset (array1
, '\0', best_size
* best_depth
* sizeof (u_int32_t
) * 3);
728 = (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
729 obstack_init (&string_pool
);
731 set_run
= catalog
->all_sets
;
732 while (set_run
!= NULL
)
734 struct message_list
*message_run
;
736 message_run
= set_run
->messages
;
737 while (message_run
!= NULL
)
739 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
741 /* Determine collision depth. */
742 while (array1
[idx
] != 0)
743 idx
+= best_size
* 3;
745 /* Store set number, message number and pointer into string
746 space, relative to the first string. */
747 array1
[idx
+ 0] = set_run
->number
;
748 array1
[idx
+ 1] = message_run
->number
;
749 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
751 /* Add current string to the continuous space containing all
753 obstack_grow0 (&string_pool
, message_run
->message
,
754 strlen (message_run
->message
));
756 message_run
= message_run
->next
;
759 set_run
= set_run
->next
;
761 strings_size
= obstack_object_size (&string_pool
);
762 strings
= obstack_finish (&string_pool
);
764 /* Compute ARRAY2 by changing the byte order. */
765 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
766 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
768 /* Now we can write out the whole data. */
769 if (strcmp (output_name
, "-") == 0
770 || strcmp (output_name
, "/dev/stdout") == 0)
774 fd
= creat (output_name
, 0666);
776 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
780 /* Write out header. */
781 write (fd
, &obj
, sizeof (obj
));
783 /* We always write out the little endian version of the index
785 #if __BYTE_ORDER == __LITTLE_ENDIAN
786 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
787 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
788 #elif __BYTE_ORDER == __BIG_ENDIAN
789 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
790 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
792 # error Cannot handle __BYTE_ORDER byte order
795 /* Finally write the strings. */
796 write (fd
, strings
, strings_size
);
798 if (fd
!= STDOUT_FILENO
)
801 /* If requested now write out the header file. */
802 if (header_name
!= NULL
)
807 /* Open output file. "-" or "/dev/stdout" means write to
809 if (strcmp (header_name
, "-") == 0
810 || strcmp (header_name
, "/dev/stdout") == 0)
814 fp
= fopen (header_name
, "w");
816 error (EXIT_FAILURE
, errno
,
817 gettext ("cannot open output file `%s'"), header_name
);
820 /* Iterate over all sets and all messages. */
821 set_run
= catalog
->all_sets
;
822 while (set_run
!= NULL
)
824 struct message_list
*message_run
;
826 /* If the current message set has a symbolic name write this
828 if (set_run
->symbol
!= NULL
)
829 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
830 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
831 set_run
->fname
, set_run
->line
);
834 message_run
= set_run
->messages
;
835 while (message_run
!= NULL
)
837 /* If the current message has a symbolic name write
838 #define out. But we have to take care for the set
839 not having a symbolic name. */
840 if (message_run
->symbol
!= NULL
)
841 if (set_run
->symbol
== NULL
)
842 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
843 set_run
->number
, message_run
->symbol
,
844 message_run
->number
, message_run
->fname
,
847 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
848 set_run
->symbol
, message_run
->symbol
,
849 message_run
->number
, message_run
->fname
,
852 message_run
= message_run
->next
;
855 set_run
= set_run
->next
;
864 static struct set_list
*
865 find_set (struct catalog
*current
, int number
)
867 struct set_list
*result
= current
->all_sets
;
869 /* We must avoid set number 0 because a set of this number signals
870 in the tables that the entry is not occupied. */
873 while (result
!= NULL
)
874 if (result
->number
== number
)
877 result
= result
->next
;
879 /* Prepare new message set. */
880 result
= (struct set_list
*) xmalloc (sizeof (*result
));
881 result
->number
= number
;
883 result
->messages
= NULL
;
884 result
->next
= current
->all_sets
;
885 current
->all_sets
= result
;
891 /* Normalize given string *in*place* by processing escape sequences
892 and quote characters. */
894 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
900 if (quote_char
!= '\0' && *rp
== quote_char
)
909 if (*rp
== quote_char
)
910 /* We simply end the string when we find the first time an
911 not-escaped quote character. */
913 else if (*rp
== '\\')
916 if (quote_char
!= '\0' && *rp
== quote_char
)
917 /* This is an extension to XPG. */
920 /* Recognize escape sequences. */
953 int number
= *rp
++ - '0';
954 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
957 number
+= *rp
++ - '0';
959 *wp
++ = (char) number
;
963 /* Simply ignore the backslash character. */
970 /* If we saw a quote character at the beginning we expect another
972 if (is_quoted
&& *rp
!= quote_char
)
973 error (0, 0, fname
, line
, gettext ("unterminated message"));
975 /* Terminate string. */
982 read_old (struct catalog
*catalog
, const char *file_name
)
984 struct catalog_info old_cat_obj
;
985 struct set_list
*set
= NULL
;
989 old_cat_obj
.status
= closed
;
990 old_cat_obj
.cat_name
= file_name
;
991 old_cat_obj
.nlspath
= NULL
;
992 __libc_lock_init (old_cat_obj
.lock
);
994 /* Try to open catalog, but don't look through the NLSPATH. */
995 __open_catalog (&old_cat_obj
);
997 if (old_cat_obj
.status
!= mmapped
&& old_cat_obj
.status
!= malloced
)
999 /* No problem, the catalog simply does not exist. */
1002 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
1004 /* OK, we have the catalog loaded. Now read all messages and merge
1005 them. When set and message number clash for any message the new
1007 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
1009 struct message_list
*message
, *last
;
1011 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
1012 /* No message in this slot. */
1015 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != (u_int32_t
) last_set
)
1017 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
1018 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
1022 message
= set
->messages
;
1023 while (message
!= NULL
)
1025 if ((u_int32_t
) message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1028 message
= message
->next
;
1032 || (u_int32_t
) message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1034 /* We have found a message which is not yet in the catalog.
1035 Insert it at the right position. */
1036 struct message_list
*newp
;
1038 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1039 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1041 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1044 newp
->symbol
= NULL
;
1045 newp
->next
= message
;
1048 set
->messages
= newp
;
1052 ++catalog
->total_messages
;