preparing for release of alpha.0.2
[Samba/gbeck.git] / source / utils / make_unicodemap.c
blob76c49361bec12ed6b8d99b062fe66678dceb7766
1 /*
2 Unix SMB/Netbios implementation.
3 Version 2.0.x.
4 Create unicode map files from unicode_def.XXX files.
6 Copyright (C) Jeremy Allison 1997-1999.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "includes.h"
25 static char *prog_name = NULL;
28 * Print program usage and die.
31 static void unicode_map_usage(char *progname)
33 fprintf(stderr, "Usage is : %s <codepage> <inputfile> <outputfile>\n",
34 progname);
35 exit(1);
39 * Read a line from a buffer into a line buffer. Ensure null
40 * terminated.
43 static void read_line( char **buf, char *line_buf, size_t size)
45 char *p = *buf;
46 size_t num = 0;
48 for(; *p && (*p != '\n') && (*p != '\032'); p++) {
49 if(num < (size - 1))
50 line_buf[num++] = *p;
52 if(*p)
53 p++; /* Go past the '\n' */
54 line_buf[num] = '\0';
55 *buf = p;
59 * Strip comment lines and blank lines from the data.
60 * Copies into a new buffer and frees the old.
61 * Returns the number of lines copied.
64 static size_t clean_data( char **buf, size_t *size)
66 pstring linebuf;
67 char *p = *buf;
68 size_t num_lines = 0;
69 char *newbuf = (char *)malloc( *size + 1);
70 char *newbuf_p = NULL;
72 if(newbuf == NULL) {
73 fprintf(stderr, "%s: malloc fail for size %u.\n", prog_name, (unsigned int)(*size + 1));
74 exit(1);
77 newbuf_p = newbuf;
78 *newbuf_p = '\0';
80 while( *p ) {
81 char *cp;
83 read_line( &p, linebuf, sizeof(linebuf));
84 /* Null terminate after comment. */
85 if((cp = strchr( linebuf, '#'))!= NULL)
86 *cp = '\0';
88 for(cp = linebuf;*cp && isspace(*cp); cp++)
91 if(*cp == '\0')
92 continue;
94 safe_strcpy(newbuf_p, cp, *size - (newbuf_p - newbuf));
95 num_lines++;
96 newbuf_p += (strlen(newbuf_p) + 1);
99 free(*buf);
100 *buf = newbuf;
101 return num_lines;
105 * Parse a uint16 from a codepage file.
108 static BOOL parse_uint16(char *buf, uint16 *uip)
110 unsigned int ui;
111 char *endptr = NULL;
113 ui = (unsigned int)strtol(buf, &endptr, 0);
114 if(endptr == buf || ui > 65535)
115 return False;
117 *uip = (uint16)ui;
118 return True;
122 * Print a parse error and exit.
125 static void parse_error(const char *buf, const char *input_file, const char *msg)
127 fprintf(stderr, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name,
128 input_file, msg, buf);
129 exit(1);
133 * Create a compiled unicode map file from a unicode map definition file.
136 static int do_compile(const char *codepage, const char *input_file, const char *output_file)
138 FILE *fp = NULL;
139 size_t size = 0;
140 size_t offset = 0;
141 char *buf = NULL;
142 char *output_buf = NULL;
143 uint16 cp_to_ucs2[65536];
144 uint16 ucs2_to_cp[65536];
145 BOOL multibyte_code_page = False;
146 int num_lines = 0;
147 int i = 0;
148 SMB_STRUCT_STAT st;
150 /* Get the size of the input file. Read the entire thing into memory. */
151 if(sys_stat((char *)input_file, &st)!= 0) {
152 fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
153 prog_name, input_file, strerror(errno));
154 exit(1);
157 size = (size_t)st.st_size;
159 if((fp = sys_fopen(input_file, "r")) == NULL) {
160 fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, input_file);
161 exit(1);
164 /* As we will be reading text, allocate one more byte for a '\0' */
165 if((buf = (char *)malloc( size + 1 )) == NULL) {
166 fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
167 fclose(fp);
168 exit(1);
171 if(fread( buf, 1, size, fp) != size) {
172 fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
173 input_file, strerror(errno));
174 free((char *)buf);
175 fclose(fp);
176 exit(1);
179 /* Null terminate the text read. */
180 buf[size] = '\0';
182 /* Go through the data line by line, strip out comments (anything
183 after a '#' to end-of-line) and blank lines. The rest should be
184 the codepage data.
187 num_lines = clean_data( &buf, &size);
190 * Initialize the output data.
193 memset(cp_to_ucs2, '\0', sizeof(cp_to_ucs2));
194 ucs2_to_cp[0] = 0;
195 for (i = 1; i < 65536; i++)
196 ucs2_to_cp[i] = (uint16)'_';
198 /* Now convert the lines into the compiled form. */
200 for(i = 0; i < num_lines; i++) {
201 char token_buf[512];
202 char *p = buf;
203 uint16 cp = 0;
204 uint16 ucs2 = 0;
206 /* Get the codepage value. */
207 if(!next_token(&p, token_buf, NULL, sizeof(token_buf)))
208 parse_error(buf, input_file, "cannot parse first value");
210 if(!parse_uint16( token_buf, &cp))
211 parse_error(buf, input_file, "first value doesn't resolve to an unsigned 16 bit integer");
213 if(cp > 255)
214 multibyte_code_page = True;
216 /* Get the ucs2 value. */
218 if(!next_token(&p, token_buf, NULL, sizeof(token_buf))) {
221 * Some of the multibyte codepage to unicode map files
222 * list a single byte as a leading multibyte and have no
223 * second value.
226 buf += (strlen(buf) + 1);
227 continue;
230 if(!parse_uint16( token_buf, &ucs2))
231 parse_error(buf, input_file, "second value doesn't resolve to an unsigned 16 bit integer");
234 * Set up the cross reference in little-endian format.
237 SSVAL(((char *)&cp_to_ucs2[cp]),0,ucs2);
238 SSVAL(((char *)&ucs2_to_cp[ucs2]),0,cp);
241 * Next line.
243 buf += (strlen(buf) + 1);
246 size = UNICODE_MAP_HEADER_SIZE + (multibyte_code_page ? (4*65536) : (2*256 + 2*65536));
248 if((output_buf = (char *)malloc( size )) == NULL) {
249 fprintf(stderr, "%s: output buffer malloc fail for size %d.\n", prog_name, size);
250 fclose(fp);
251 exit(1);
254 /* Setup the output file header. */
255 SSVAL(output_buf,UNICODE_MAP_VERSION_OFFSET,UNICODE_MAP_FILE_VERSION_ID);
256 memset(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET],'\0',UNICODE_MAP_CODEPAGE_ID_SIZE);
257 safe_strcpy(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage, UNICODE_MAP_CODEPAGE_ID_SIZE - 1);
258 output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET+UNICODE_MAP_CODEPAGE_ID_SIZE-1] = '\0';
260 offset = UNICODE_MAP_HEADER_SIZE;
262 if (multibyte_code_page) {
263 SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*65536);
264 memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*65536);
265 offset += 2*65536;
266 } else {
267 SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*256);
268 memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*256);
269 offset += 2*256;
271 SIVAL(output_buf,UNICODE_MAP_UNICODE_TO_CP_LENGTH_OFFSET,65536*2);
272 memcpy(output_buf+offset, (char *)ucs2_to_cp, 2*65536);
274 /* Now write out the output_buf. */
275 if((fp = sys_fopen(output_file, "w"))==NULL) {
276 fprintf(stderr, "%s: Cannot open output file %s. Error was %s.\n",
277 prog_name, output_file, strerror(errno));
278 exit(1);
281 if(fwrite(output_buf, 1, size, fp) != size) {
282 fprintf(stderr, "%s: Cannot write output file %s. Error was %s.\n",
283 prog_name, output_file, strerror(errno));
284 exit(1);
287 fclose(fp);
289 return 0;
292 int main(int argc, char **argv)
294 const char *codepage = NULL;
295 char *input_file = NULL;
296 char *output_file = NULL;
298 prog_name = argv[0];
300 if(argc != 4)
301 unicode_map_usage(prog_name);
303 codepage = argv[1];
304 input_file = argv[2];
305 output_file = argv[3];
307 return do_compile( codepage, input_file, output_file);