1 /* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
43 #include "catgetsinfo.h"
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
58 struct message_list
*next
;
66 struct message_list
*messages
;
73 struct set_list
*next
;
79 struct set_list
*all_sets
;
80 struct set_list
*current_set
;
81 size_t total_messages
;
85 struct obstack mem_pool
;
89 /* If non-zero force creation of new file, not using existing one. */
92 /* Name of output file. */
93 static const char *output_name
;
95 /* Name of generated C header file. */
96 static const char *header_name
;
98 /* Name and version of program. */
99 static void print_version (FILE *stream
, struct argp_state
*state
);
100 void (*argp_program_version_hook
) (FILE *, struct argp_state
*) = print_version
;
104 /* Definitions of arguments for argp functions. */
105 static const struct argp_option options
[] =
107 { "header", 'H', N_("NAME"), 0,
108 N_("Create C header file NAME containing symbol definitions") },
109 { "new", OPT_NEW
, NULL
, 0,
110 N_("Do not use existing catalog, force new output file") },
111 { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
112 { NULL
, 0, NULL
, 0, NULL
}
115 /* Short description of program. */
116 static const char doc
[] = N_("Generate message catalog.\
117 \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
118 is -, output is written to standard output.\n");
120 /* Strings for arguments in help texts. */
121 static const char args_doc
[] = N_("\
122 -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
124 /* Prototype for option handler. */
125 static error_t
parse_opt (int key
, char *arg
, struct argp_state
*state
);
127 /* Function to print some extra text in the help message. */
128 static char *more_help (int key
, const char *text
, void *input
);
130 /* Data structure to communicate with argp functions. */
131 static struct argp argp
=
133 options
, parse_opt
, args_doc
, doc
, NULL
, more_help
137 /* Wrapper functions with error checking for standard functions. */
138 extern void *xmalloc (size_t n
);
139 extern void *xcalloc (size_t n
, size_t s
);
141 /* Prototypes for local functions. */
142 static void error_print (void);
143 static struct catalog
*read_input_file (struct catalog
*current
,
145 static void write_out (struct catalog
*result
, const char *output_name
,
146 const char *header_name
);
147 static struct set_list
*find_set (struct catalog
*current
, int number
);
148 static void normalize_line (const char *fname
, size_t line
, char *string
,
150 static void read_old (struct catalog
*catalog
, const char *file_name
);
154 main (int argc
, char *argv
[])
156 struct catalog
*result
;
159 /* Set program name for messages. */
160 error_print_progname
= error_print
;
162 /* Set locale via LC_ALL. */
163 setlocale (LC_ALL
, "");
165 /* Set the text message domain. */
166 textdomain (PACKAGE
);
168 /* Initialize local variables. */
171 /* Parse and process arguments. */
172 argp_parse (&argp
, argc
, argv
, 0, &remaining
, NULL
);
174 /* Determine output file. */
175 if (output_name
== NULL
)
176 output_name
= remaining
< argc
? argv
[remaining
++] : "-";
178 /* Process all input files. */
179 setlocale (LC_CTYPE
, "C");
180 if (remaining
< argc
)
182 result
= read_input_file (result
, argv
[remaining
]);
183 while (++remaining
< argc
);
185 result
= read_input_file (NULL
, "-");
187 /* Write out the result. */
189 write_out (result
, output_name
, header_name
);
195 /* Handle program arguments. */
197 parse_opt (int key
, char *arg
, struct argp_state
*state
)
211 return ARGP_ERR_UNKNOWN
;
218 more_help (int key
, const char *text
, void *input
)
222 case ARGP_KEY_HELP_EXTRA
:
223 /* We print some extra information. */
224 return strdup (gettext ("\
225 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
229 return (char *) text
;
232 /* Print the version information. */
234 print_version (FILE *stream
, struct argp_state
*state
)
236 fprintf (stream
, "gencat (GNU %s) %s\n", PACKAGE
, VERSION
);
237 fprintf (stream
, gettext ("\
238 Copyright (C) %s Free Software Foundation, Inc.\n\
239 This is free software; see the source for copying conditions. There is NO\n\
240 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
242 fprintf (stream
, gettext ("Written by %s.\n"), "Ulrich Drepper");
246 /* The address of this function will be assigned to the hook in the
251 /* We don't want the program name to be printed in messages. Emacs'
252 compile.el does not like this. */
256 static struct catalog
*
257 read_input_file (struct catalog
*current
, const char *fname
)
264 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
267 fname
= gettext ("*standard input*");
270 fp
= fopen (fname
, "r");
273 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
277 /* If we haven't seen anything yet, allocate result structure. */
280 current
= (struct catalog
*) xcalloc (1, sizeof (*current
));
282 #define obstack_chunk_alloc malloc
283 #define obstack_chunk_free free
284 obstack_init (¤t
->mem_pool
);
286 current
->current_set
= find_set (current
, NL_SETD
);
296 size_t start_line
= line_number
+ 1;
303 act_len
= getline (&buf
, &len
, fp
);
308 /* It the line continued? */
309 if (buf
[act_len
- 1] == '\n')
312 continued
= buf
[act_len
- 1] == '\\';
319 /* Append to currently selected line. */
320 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
324 obstack_1grow (¤t
->mem_pool
, '\0');
325 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
328 if (this_line
[0] == '$')
330 if (isblank (this_line
[1]))
331 /* This is a comment line. Do nothing. */;
332 else if (strncmp (&this_line
[1], "set", 3) == 0)
334 int cnt
= sizeof ("set");
336 const char *symbol
= NULL
;
337 while (isspace (this_line
[cnt
]))
340 if (isdigit (this_line
[cnt
]))
342 set_number
= atol (&this_line
[cnt
]);
344 /* If the given number for the character set is
345 higher than any we used for symbolic set names
346 avoid clashing by using only higher numbers for
347 the following symbolic definitions. */
348 if (set_number
> current
->last_set
)
349 current
->last_set
= set_number
;
353 /* See whether it is a reasonable identifier. */
355 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
360 /* No correct character found. */
361 error_at_line (0, 0, fname
, start_line
,
362 gettext ("illegal set number"));
367 /* We have found seomthing that looks like a
368 correct identifier. */
369 struct set_list
*runp
;
371 this_line
[cnt
] = '\0';
373 symbol
= &this_line
[start
];
375 /* Test whether the identifier was already used. */
376 runp
= current
->all_sets
;
378 if (runp
->symbol
!= NULL
379 && strcmp (runp
->symbol
, symbol
) == 0)
386 /* We cannot allow duplicate identifiers for
388 error_at_line (0, 0, fname
, start_line
,
389 gettext ("duplicate set definition"));
390 error_at_line (0, 0, runp
->fname
, runp
->line
,
392 this is the first definition"));
396 /* Allocate next free message set for identifier. */
397 set_number
= ++current
->last_set
;
403 /* We found a legal set number. */
404 current
->current_set
= find_set (current
, set_number
);
407 current
->current_set
->symbol
= symbol
;
408 current
->current_set
->fname
= fname
;
409 current
->current_set
->line
= start_line
;
412 else if (strncmp (&this_line
[1], "delset", 6) == 0)
414 int cnt
= sizeof ("delset");
416 while (isspace (this_line
[cnt
]))
419 if (isdigit (this_line
[cnt
]))
421 size_t set_number
= atol (&this_line
[cnt
]);
422 struct set_list
*set
;
424 /* Mark the message set with the given number as
426 set
= find_set (current
, set_number
);
431 /* See whether it is a reasonable identifier. */
433 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
438 error_at_line (0, 0, fname
, start_line
,
439 gettext ("illegal set number"));
445 struct set_list
*runp
;
447 this_line
[cnt
] = '\0';
449 symbol
= &this_line
[start
];
451 /* We have a symbolic set name. This name must
452 appear somewhere else in the catalogs read so
455 for (runp
= current
->all_sets
; runp
!= NULL
;
458 if (strcmp (runp
->symbol
, symbol
) == 0)
465 /* Name does not exist before. */
466 error_at_line (0, 0, fname
, start_line
,
467 gettext ("unknown set `%s'"), symbol
);
471 else if (strncmp (&this_line
[1], "quote", 5) == 0)
473 int cnt
= sizeof ("quote");
474 while (isspace (this_line
[cnt
]))
476 /* Yes, the quote char can be '\0'; this means no quote
478 current
->quote_char
= this_line
[cnt
];
484 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
486 this_line
[cnt
] = '\0';
487 error_at_line (0, 0, fname
, start_line
,
488 gettext ("unknown directive `%s': line ignored"),
492 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
494 const char *ident
= this_line
;
499 while (this_line
[0] != '\0' && !isspace (this_line
[0]));
500 if (this_line
[0] != '\0')
501 *this_line
++ = '\0'; /* Terminate the identifier. */
503 /* Now we found the beginning of the message itself. */
505 if (isdigit (ident
[0]))
507 struct message_list
*runp
;
508 struct message_list
*lastp
;
510 message_number
= atoi (ident
);
512 /* Find location to insert the new message. */
513 runp
= current
->current_set
->messages
;
516 if (runp
->number
== message_number
)
525 /* Oh, oh. There is already a message with this
526 number in the message set. */
527 error_at_line (0, 0, fname
, start_line
,
528 gettext ("duplicated message number"));
529 error_at_line (0, 0, runp
->fname
, runp
->line
,
530 gettext ("this is the first definition"));
533 ident
= NULL
; /* We don't have a symbol. */
535 if (message_number
!= 0
536 && message_number
> current
->current_set
->last_message
)
537 current
->current_set
->last_message
= message_number
;
539 else if (ident
[0] != '\0')
541 struct message_list
*runp
;
542 struct message_list
*lastp
;
544 /* Test whether the symbolic name was not used for
545 another message in this message set. */
546 runp
= current
->current_set
->messages
;
549 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
555 /* The name is already used. */
556 error_at_line (0, 0, fname
, start_line
, gettext ("\
557 duplicated message identifier"));
558 error_at_line (0, 0, runp
->fname
, runp
->line
,
559 gettext ("this is the first definition"));
563 /* Give the message the next unused number. */
564 message_number
= ++current
->current_set
->last_message
;
569 if (message_number
!= 0)
571 struct message_list
*newp
;
573 used
= 1; /* Yes, we use the line. */
575 /* Strip quote characters, change escape sequences into
576 correct characters etc. */
577 normalize_line (fname
, start_line
, this_line
,
578 current
->quote_char
);
580 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
581 newp
->number
= message_number
;
582 newp
->message
= this_line
;
583 /* Remember symbolic name; is NULL if no is given. */
584 newp
->symbol
= ident
;
585 /* Remember where we found the character. */
587 newp
->line
= start_line
;
589 /* Find place to insert to message. We keep them in a
590 sorted single linked list. */
591 if (current
->current_set
->messages
== NULL
592 || current
->current_set
->messages
->number
> message_number
)
594 newp
->next
= current
->current_set
->messages
;
595 current
->current_set
->messages
= newp
;
599 struct message_list
*runp
;
600 runp
= current
->current_set
->messages
;
601 while (runp
->next
!= NULL
)
602 if (runp
->next
->number
> message_number
)
606 newp
->next
= runp
->next
;
610 ++current
->total_messages
;
617 /* See whether we have any non-white space character in this
619 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
622 if (this_line
[cnt
] != '\0')
623 /* Yes, some unknown characters found. */
624 error_at_line (0, 0, fname
, start_line
,
625 gettext ("malformed line ignored"));
628 /* We can save the memory for the line if it was not used. */
630 obstack_free (¤t
->mem_pool
, this_line
);
640 write_out (struct catalog
*catalog
, const char *output_name
,
641 const char *header_name
)
643 /* Computing the "optimal" size. */
644 struct set_list
*set_run
;
645 size_t best_total
, best_size
, best_depth
;
646 size_t act_size
, act_depth
;
647 struct catalog_obj obj
;
648 struct obstack string_pool
;
651 uint32_t *array1
, *array2
;
655 /* If not otherwise told try to read file with existing
658 read_old (catalog
, output_name
);
660 /* Initialize best_size with a very high value. */
661 best_total
= best_size
= best_depth
= UINT_MAX
;
663 /* We need some start size for testing. Let's start with
664 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
666 act_size
= 1 + catalog
->total_messages
/ 5;
668 /* We determine the size of a hash table here. Because the message
669 numbers can be chosen arbitrary by the programmer we cannot use
670 the simple method of accessing the array using the message
671 number. The algorithm is based on the trivial hash function
672 NUMBER % TABLE_SIZE, where collisions are stored in a second
673 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
674 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
675 while (act_size
<= best_total
)
677 size_t deep
[act_size
];
680 memset (deep
, '\0', act_size
* sizeof (size_t));
681 set_run
= catalog
->all_sets
;
682 while (set_run
!= NULL
)
684 struct message_list
*message_run
;
686 message_run
= set_run
->messages
;
687 while (message_run
!= NULL
)
689 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
692 if (deep
[idx
] > act_depth
)
694 act_depth
= deep
[idx
];
695 if (act_depth
* act_size
> best_total
)
698 message_run
= message_run
->next
;
700 set_run
= set_run
->next
;
703 if (act_depth
* act_size
<= best_total
)
705 /* We have found a better solution. */
706 best_total
= act_depth
* act_size
;
707 best_size
= act_size
;
708 best_depth
= act_depth
;
714 /* let's be prepared for an empty message file. */
715 if (best_size
== UINT_MAX
)
721 /* OK, now we have the size we will use. Fill in the header, build
722 the table and the second one with swapped byte order. */
723 obj
.magic
= CATGETS_MAGIC
;
724 obj
.plane_size
= best_size
;
725 obj
.plane_depth
= best_depth
;
727 /* Allocate room for all needed arrays. */
729 (uint32_t *) alloca (best_size
* best_depth
* sizeof (uint32_t) * 3);
730 memset (array1
, '\0', best_size
* best_depth
* sizeof (uint32_t) * 3);
732 = (uint32_t *) alloca (best_size
* best_depth
* sizeof (uint32_t) * 3);
733 obstack_init (&string_pool
);
735 set_run
= catalog
->all_sets
;
736 while (set_run
!= NULL
)
738 struct message_list
*message_run
;
740 message_run
= set_run
->messages
;
741 while (message_run
!= NULL
)
743 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
745 /* Determine collision depth. */
746 while (array1
[idx
] != 0)
747 idx
+= best_size
* 3;
749 /* Store set number, message number and pointer into string
750 space, relative to the first string. */
751 array1
[idx
+ 0] = set_run
->number
;
752 array1
[idx
+ 1] = message_run
->number
;
753 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
755 /* Add current string to the continuous space containing all
757 obstack_grow0 (&string_pool
, message_run
->message
,
758 strlen (message_run
->message
));
760 message_run
= message_run
->next
;
763 set_run
= set_run
->next
;
765 strings_size
= obstack_object_size (&string_pool
);
766 strings
= obstack_finish (&string_pool
);
768 /* Compute ARRAY2 by changing the byte order. */
769 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
770 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
772 /* Now we can write out the whole data. */
773 if (strcmp (output_name
, "-") == 0
774 || strcmp (output_name
, "/dev/stdout") == 0)
778 fd
= creat (output_name
, 0666);
780 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
784 /* Write out header. */
785 write (fd
, &obj
, sizeof (obj
));
787 /* We always write out the little endian version of the index
789 #if __BYTE_ORDER == __LITTLE_ENDIAN
790 write (fd
, array1
, best_size
* best_depth
* sizeof (uint32_t) * 3);
791 write (fd
, array2
, best_size
* best_depth
* sizeof (uint32_t) * 3);
792 #elif __BYTE_ORDER == __BIG_ENDIAN
793 write (fd
, array2
, best_size
* best_depth
* sizeof (uint32_t) * 3);
794 write (fd
, array1
, best_size
* best_depth
* sizeof (uint32_t) * 3);
796 # error Cannot handle __BYTE_ORDER byte order
799 /* Finally write the strings. */
800 write (fd
, strings
, strings_size
);
802 if (fd
!= STDOUT_FILENO
)
805 /* If requested now write out the header file. */
806 if (header_name
!= NULL
)
811 /* Open output file. "-" or "/dev/stdout" means write to
813 if (strcmp (header_name
, "-") == 0
814 || strcmp (header_name
, "/dev/stdout") == 0)
818 fp
= fopen (header_name
, "w");
820 error (EXIT_FAILURE
, errno
,
821 gettext ("cannot open output file `%s'"), header_name
);
824 /* Iterate over all sets and all messages. */
825 set_run
= catalog
->all_sets
;
826 while (set_run
!= NULL
)
828 struct message_list
*message_run
;
830 /* If the current message set has a symbolic name write this
832 if (set_run
->symbol
!= NULL
)
833 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
834 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
835 set_run
->fname
, set_run
->line
);
838 message_run
= set_run
->messages
;
839 while (message_run
!= NULL
)
841 /* If the current message has a symbolic name write
842 #define out. But we have to take care for the set
843 not having a symbolic name. */
844 if (message_run
->symbol
!= NULL
)
846 if (set_run
->symbol
== NULL
)
847 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
848 set_run
->number
, message_run
->symbol
,
849 message_run
->number
, message_run
->fname
,
852 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
853 set_run
->symbol
, message_run
->symbol
,
854 message_run
->number
, message_run
->fname
,
858 message_run
= message_run
->next
;
861 set_run
= set_run
->next
;
870 static struct set_list
*
871 find_set (struct catalog
*current
, int number
)
873 struct set_list
*result
= current
->all_sets
;
875 /* We must avoid set number 0 because a set of this number signals
876 in the tables that the entry is not occupied. */
879 while (result
!= NULL
)
880 if (result
->number
== number
)
883 result
= result
->next
;
885 /* Prepare new message set. */
886 result
= (struct set_list
*) xcalloc (1, sizeof (*result
));
887 result
->number
= number
;
888 result
->next
= current
->all_sets
;
889 current
->all_sets
= result
;
895 /* Normalize given string *in*place* by processing escape sequences
896 and quote characters. */
898 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
904 if (quote_char
!= '\0' && *rp
== quote_char
)
913 if (*rp
== quote_char
)
914 /* We simply end the string when we find the first time an
915 not-escaped quote character. */
917 else if (*rp
== '\\')
920 if (quote_char
!= '\0' && *rp
== quote_char
)
921 /* This is an extension to XPG. */
924 /* Recognize escape sequences. */
957 int number
= *rp
++ - '0';
958 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
961 number
+= *rp
++ - '0';
963 *wp
++ = (char) number
;
967 /* Simply ignore the backslash character. */
974 /* If we saw a quote character at the beginning we expect another
976 if (is_quoted
&& *rp
!= quote_char
)
977 error (0, 0, fname
, line
, gettext ("unterminated message"));
979 /* Terminate string. */
986 read_old (struct catalog
*catalog
, const char *file_name
)
988 struct catalog_info old_cat_obj
;
989 struct set_list
*set
= NULL
;
993 old_cat_obj
.status
= closed
;
994 old_cat_obj
.cat_name
= file_name
;
995 old_cat_obj
.nlspath
= NULL
;
996 __libc_lock_init (old_cat_obj
.lock
);
998 /* Try to open catalog, but don't look through the NLSPATH. */
999 __open_catalog (&old_cat_obj
);
1001 if (old_cat_obj
.status
!= mmapped
&& old_cat_obj
.status
!= malloced
)
1003 if (errno
== ENOENT
)
1004 /* No problem, the catalog simply does not exist. */
1007 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
1010 /* OK, we have the catalog loaded. Now read all messages and merge
1011 them. When set and message number clash for any message the new
1012 one is used. If the new one is empty it indicates that the
1013 message should be deleted. */
1014 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
1016 struct message_list
*message
, *last
;
1018 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
1019 /* No message in this slot. */
1022 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != (uint32_t) last_set
)
1024 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
1025 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
1029 message
= set
->messages
;
1030 while (message
!= NULL
)
1032 if ((uint32_t) message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1035 message
= message
->next
;
1039 || (uint32_t) message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1041 /* We have found a message which is not yet in the catalog.
1042 Insert it at the right position. */
1043 struct message_list
*newp
;
1045 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1046 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1048 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1051 newp
->symbol
= NULL
;
1052 newp
->next
= message
;
1055 set
->messages
= newp
;
1059 ++catalog
->total_messages
;
1061 else if (*message
->message
== '\0')
1063 /* The new empty message has overridden the old one thus
1064 "deleting" it as required. Now remove the empty remains. */
1066 set
->messages
= message
->next
;
1068 last
->next
= message
->next
;