2 Unix SMB/Netbios implementation.
4 Create unicode map files from unicode_def.XXX files.
6 Copyright (C) Jeremy Allison 1997-1999.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static char *prog_name
= NULL
;
28 * Print program usage and die.
31 static void unicode_map_usage(char *progname
)
33 fprintf(stderr
, "Usage is : %s <codepage> <inputfile> <outputfile>\n",
39 * Read a line from a buffer into a line buffer. Ensure null
43 static void read_line( char **buf
, char *line_buf
, size_t size
)
48 for(; *p
&& (*p
!= '\n') && (*p
!= '\032'); p
++) {
53 p
++; /* Go past the '\n' */
59 * Strip comment lines and blank lines from the data.
60 * Copies into a new buffer and frees the old.
61 * Returns the number of lines copied.
64 static size_t clean_data( char **buf
, size_t *size
)
69 char *newbuf
= (char *)malloc( *size
+ 1);
70 char *newbuf_p
= NULL
;
73 fprintf(stderr
, "%s: malloc fail for size %u.\n", prog_name
, (unsigned int)(*size
+ 1));
83 read_line( &p
, linebuf
, sizeof(linebuf
));
84 /* Null terminate after comment. */
85 if((cp
= strchr( linebuf
, '#'))!= NULL
)
88 for(cp
= linebuf
;*cp
&& isspace(*cp
); cp
++)
94 safe_strcpy(newbuf_p
, cp
, *size
- (newbuf_p
- newbuf
));
96 newbuf_p
+= (strlen(newbuf_p
) + 1);
105 * Parse a uint16 from a codepage file.
108 static BOOL
parse_uint16(char *buf
, uint16
*uip
)
113 ui
= (unsigned int)strtol(buf
, &endptr
, 0);
114 if(endptr
== buf
|| ui
> 65535)
122 * Print a parse error and exit.
125 static void parse_error(const char *buf
, const char *input_file
, const char *msg
)
127 fprintf(stderr
, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name
,
128 input_file
, msg
, buf
);
133 * Create a compiled unicode map file from a unicode map definition file.
136 static int do_compile(const char *codepage
, const char *input_file
, const char *output_file
)
142 char *orig_buf
= NULL
;
143 char *output_buf
= NULL
;
144 uint16 cp_to_ucs2
[65536];
145 uint16 ucs2_to_cp
[65536];
146 BOOL multibyte_code_page
= False
;
151 /* Get the size of the input file. Read the entire thing into memory. */
152 if(sys_stat((char *)input_file
, &st
)!= 0) {
153 fprintf(stderr
, "%s: failed to get the file size for file %s. Error was %s\n",
154 prog_name
, input_file
, strerror(errno
));
158 size
= (size_t)st
.st_size
;
160 if((fp
= sys_fopen(input_file
, "r")) == NULL
) {
161 fprintf(stderr
, "%s: cannot open file %s for input.\n", prog_name
, input_file
);
165 /* As we will be reading text, allocate one more byte for a '\0' */
166 if((buf
= (char *)malloc( size
+ 1 )) == NULL
) {
167 fprintf(stderr
, "%s: malloc fail for size %d.\n", prog_name
, size
+ 1);
172 if(fread( buf
, 1, size
, fp
) != size
) {
173 fprintf(stderr
, "%s: read failed for file %s. Error was %s.\n", prog_name
,
174 input_file
, strerror(errno
));
180 /* Null terminate the text read. */
183 /* Go through the data line by line, strip out comments (anything
184 after a '#' to end-of-line) and blank lines. The rest should be
188 num_lines
= clean_data( &buf
, &size
);
190 orig_buf
= buf
; /* Store for free(). */
193 * Initialize the output data.
196 memset(cp_to_ucs2
, '\0', sizeof(cp_to_ucs2
));
198 for (i
= 1; i
< 65536; i
++)
199 ucs2_to_cp
[i
] = (uint16
)'_';
201 /* Now convert the lines into the compiled form. */
203 for(i
= 0; i
< num_lines
; i
++) {
209 /* Get the codepage value. */
210 if(!next_token(&p
, token_buf
, NULL
, sizeof(token_buf
)))
211 parse_error(buf
, input_file
, "cannot parse first value");
213 if(!parse_uint16( token_buf
, &cp
))
214 parse_error(buf
, input_file
, "first value doesn't resolve to an unsigned 16 bit integer");
217 multibyte_code_page
= True
;
219 /* Get the ucs2 value. */
221 if(!next_token(&p
, token_buf
, NULL
, sizeof(token_buf
))) {
224 * Some of the multibyte codepage to unicode map files
225 * list a single byte as a leading multibyte and have no
229 buf
+= (strlen(buf
) + 1);
233 if(!parse_uint16( token_buf
, &ucs2
))
234 parse_error(buf
, input_file
, "second value doesn't resolve to an unsigned 16 bit integer");
237 * Set up the cross reference in little-endian format.
240 SSVAL(((char *)&cp_to_ucs2
[cp
]),0,ucs2
);
241 SSVAL(((char *)&ucs2_to_cp
[ucs2
]),0,cp
);
246 buf
+= (strlen(buf
) + 1);
249 size
= UNICODE_MAP_HEADER_SIZE
+ (multibyte_code_page
? (4*65536) : (2*256 + 2*65536));
251 if((output_buf
= (char *)malloc( size
)) == NULL
) {
252 fprintf(stderr
, "%s: output buffer malloc fail for size %d.\n", prog_name
, size
);
257 /* Setup the output file header. */
258 SSVAL(output_buf
,UNICODE_MAP_VERSION_OFFSET
,UNICODE_MAP_FILE_VERSION_ID
);
259 memset(&output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
],'\0',UNICODE_MAP_CODEPAGE_ID_SIZE
);
260 safe_strcpy(&output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
], codepage
, UNICODE_MAP_CODEPAGE_ID_SIZE
- 1);
261 output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
+UNICODE_MAP_CODEPAGE_ID_SIZE
-1] = '\0';
263 offset
= UNICODE_MAP_HEADER_SIZE
;
265 if (multibyte_code_page
) {
266 SIVAL(output_buf
,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET
,2*65536);
267 memcpy(output_buf
+offset
, (char *)cp_to_ucs2
, 2*65536);
270 SIVAL(output_buf
,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET
,2*256);
271 memcpy(output_buf
+offset
, (char *)cp_to_ucs2
, 2*256);
274 SIVAL(output_buf
,UNICODE_MAP_UNICODE_TO_CP_LENGTH_OFFSET
,65536*2);
275 memcpy(output_buf
+offset
, (char *)ucs2_to_cp
, 2*65536);
277 /* Now write out the output_buf. */
278 if((fp
= sys_fopen(output_file
, "w"))==NULL
) {
279 fprintf(stderr
, "%s: Cannot open output file %s. Error was %s.\n",
280 prog_name
, output_file
, strerror(errno
));
284 if(fwrite(output_buf
, 1, size
, fp
) != size
) {
285 fprintf(stderr
, "%s: Cannot write output file %s. Error was %s.\n",
286 prog_name
, output_file
, strerror(errno
));
297 int main(int argc
, char **argv
)
299 const char *codepage
= NULL
;
300 char *input_file
= NULL
;
301 char *output_file
= NULL
;
306 unicode_map_usage(prog_name
);
309 input_file
= argv
[2];
310 output_file
= argv
[3];
312 return do_compile( codepage
, input_file
, output_file
);