2 Unix SMB/Netbios implementation.
4 Create unicode map files from unicode_def.XXX files.
6 Copyright (C) Jeremy Allison 1997-1999.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static char *prog_name
= NULL
;
28 * Print program usage and die.
31 static void unicode_map_usage(char *progname
)
33 fprintf(stderr
, "Usage is : %s <codepage> <inputfile> <outputfile>\n",
39 * Read a line from a buffer into a line buffer. Ensure null
43 static void read_line( char **buf
, char *line_buf
, size_t size
)
48 for(; *p
&& (*p
!= '\n') && (*p
!= '\032'); p
++) {
53 p
++; /* Go past the '\n' */
59 * Strip comment lines and blank lines from the data.
60 * Copies into a new buffer and frees the old.
61 * Returns the number of lines copied.
64 static size_t clean_data( char **buf
, size_t *size
)
69 char *newbuf
= (char *)malloc( *size
+ 1);
70 char *newbuf_p
= NULL
;
73 fprintf(stderr
, "%s: malloc fail for size %u.\n", prog_name
, (unsigned int)(*size
+ 1));
83 read_line( &p
, linebuf
, sizeof(linebuf
));
84 /* Null terminate after comment. */
85 if((cp
= strchr( linebuf
, '#'))!= NULL
)
88 for(cp
= linebuf
;*cp
&& isspace(*cp
); cp
++)
94 safe_strcpy(newbuf_p
, cp
, *size
- (newbuf_p
- newbuf
));
96 newbuf_p
+= (strlen(newbuf_p
) + 1);
105 * Parse a uint16 from a codepage file.
108 static BOOL
parse_uint16(char *buf
, uint16
*uip
)
113 ui
= (unsigned int)strtol(buf
, &endptr
, 0);
114 if(endptr
== buf
|| ui
> 65535)
122 * Print a parse error and exit.
125 static void parse_error(const char *buf
, const char *input_file
, const char *msg
)
127 fprintf(stderr
, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name
,
128 input_file
, msg
, buf
);
133 * Create a compiled unicode map file from a unicode map definition file.
136 static int do_compile(const char *codepage
, const char *input_file
, const char *output_file
)
142 char *output_buf
= NULL
;
143 uint16 cp_to_ucs2
[65536];
144 uint16 ucs2_to_cp
[65536];
145 BOOL multibyte_code_page
= False
;
150 /* Get the size of the input file. Read the entire thing into memory. */
151 if(sys_stat((char *)input_file
, &st
)!= 0) {
152 fprintf(stderr
, "%s: failed to get the file size for file %s. Error was %s\n",
153 prog_name
, input_file
, strerror(errno
));
157 size
= (size_t)st
.st_size
;
159 if((fp
= sys_fopen(input_file
, "r")) == NULL
) {
160 fprintf(stderr
, "%s: cannot open file %s for input.\n", prog_name
, input_file
);
164 /* As we will be reading text, allocate one more byte for a '\0' */
165 if((buf
= (char *)malloc( size
+ 1 )) == NULL
) {
166 fprintf(stderr
, "%s: malloc fail for size %d.\n", prog_name
, size
+ 1);
171 if(fread( buf
, 1, size
, fp
) != size
) {
172 fprintf(stderr
, "%s: read failed for file %s. Error was %s.\n", prog_name
,
173 input_file
, strerror(errno
));
179 /* Null terminate the text read. */
182 /* Go through the data line by line, strip out comments (anything
183 after a '#' to end-of-line) and blank lines. The rest should be
187 num_lines
= clean_data( &buf
, &size
);
190 * Initialize the output data.
193 memset(cp_to_ucs2
, '\0', sizeof(cp_to_ucs2
));
195 for (i
= 1; i
< 65536; i
++)
196 ucs2_to_cp
[i
] = (uint16
)'_';
198 /* Now convert the lines into the compiled form. */
200 for(i
= 0; i
< num_lines
; i
++) {
206 /* Get the codepage value. */
207 if(!next_token(&p
, token_buf
, NULL
, sizeof(token_buf
)))
208 parse_error(buf
, input_file
, "cannot parse first value");
210 if(!parse_uint16( token_buf
, &cp
))
211 parse_error(buf
, input_file
, "first value doesn't resolve to an unsigned 16 bit integer");
214 multibyte_code_page
= True
;
216 /* Get the ucs2 value. */
218 if(!next_token(&p
, token_buf
, NULL
, sizeof(token_buf
))) {
221 * Some of the multibyte codepage to unicode map files
222 * list a single byte as a leading multibyte and have no
226 buf
+= (strlen(buf
) + 1);
230 if(!parse_uint16( token_buf
, &ucs2
))
231 parse_error(buf
, input_file
, "second value doesn't resolve to an unsigned 16 bit integer");
234 * Set up the cross reference in little-endian format.
237 SSVAL(((char *)&cp_to_ucs2
[cp
]),0,ucs2
);
238 SSVAL(((char *)&ucs2_to_cp
[ucs2
]),0,cp
);
243 buf
+= (strlen(buf
) + 1);
246 size
= UNICODE_MAP_HEADER_SIZE
+ (multibyte_code_page
? (4*65536) : (2*256 + 2*65536));
248 if((output_buf
= (char *)malloc( size
)) == NULL
) {
249 fprintf(stderr
, "%s: output buffer malloc fail for size %d.\n", prog_name
, size
);
254 /* Setup the output file header. */
255 SSVAL(output_buf
,UNICODE_MAP_VERSION_OFFSET
,UNICODE_MAP_FILE_VERSION_ID
);
256 memset(&output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
],'\0',UNICODE_MAP_CODEPAGE_ID_SIZE
);
257 safe_strcpy(&output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
], codepage
, UNICODE_MAP_CODEPAGE_ID_SIZE
- 1);
258 output_buf
[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET
+UNICODE_MAP_CODEPAGE_ID_SIZE
-1] = '\0';
260 offset
= UNICODE_MAP_HEADER_SIZE
;
262 if (multibyte_code_page
) {
263 SIVAL(output_buf
,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET
,2*65536);
264 memcpy(output_buf
+offset
, (char *)cp_to_ucs2
, 2*65536);
267 SIVAL(output_buf
,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET
,2*256);
268 memcpy(output_buf
+offset
, (char *)cp_to_ucs2
, 2*256);
271 SIVAL(output_buf
,UNICODE_MAP_UNICODE_TO_CP_LENGTH_OFFSET
,65536*2);
272 memcpy(output_buf
+offset
, (char *)ucs2_to_cp
, 2*65536);
274 /* Now write out the output_buf. */
275 if((fp
= sys_fopen(output_file
, "w"))==NULL
) {
276 fprintf(stderr
, "%s: Cannot open output file %s. Error was %s.\n",
277 prog_name
, output_file
, strerror(errno
));
281 if(fwrite(output_buf
, 1, size
, fp
) != size
) {
282 fprintf(stderr
, "%s: Cannot write output file %s. Error was %s.\n",
283 prog_name
, output_file
, strerror(errno
));
292 int main(int argc
, char **argv
)
294 const char *codepage
= NULL
;
295 char *input_file
= NULL
;
296 char *output_file
= NULL
;
301 unicode_map_usage(prog_name
);
304 input_file
= argv
[2];
305 output_file
= argv
[3];
307 return do_compile( codepage
, input_file
, output_file
);