built-in jambase is packed now
[k8jam.git] / src / mkjambase.c
blob606cb37bb8652386ac25da64b7b1cfd0c70926ae
1 /*
2 * Copyright 1993, 1995 Christopher Seiwald.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
7 /*
8 * mkjambase.c - turn Jambase into a big C structure
10 * Usage: mkjambase jambase.c Jambase ...
12 * Results look like this:
14 * char *jambase[] = {
15 * "...\n",
16 * ...
17 * 0 };
19 * Handles \'s and "'s specially; knows how to delete blank and comment lines.
21 * 11/04/02 (seiwald) - const-ing for string literals
23 #include <ctype.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
30 //#define MKJAMBASE_COMPACT
33 ////////////////////////////////////////////////////////////////////////////////
34 // compression code license (ONLY compression code)
36 // Copyright (c) 1997-2001 John Sadler (john_sadler@alum.mit.edu)
37 // All rights reserved.
39 // Redistribution and use in source and binary forms, with or without
40 // modification, are permitted provided that the following conditions
41 // are met:
43 // * Redistributions of source code must retain the above copyright
44 // notice, this list of conditions and the following disclaimer.
46 // * Redistributions in binary form must reproduce the above copyright
47 // notice, this list of conditions and the following disclaimer in the
48 // documentation and/or other materials provided with the distribution.
50 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
54 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 // SUCH DAMAGE.
62 static uint16_t ficlNetworkUnsigned16 (uint16_t number) {
63 uint8_t *pointer = (uint8_t *)&number;
65 return (uint16_t)(((uint16_t)(pointer[0]<<8))|(pointer[1]));
69 static uint32_t ficlNetworkUnsigned32 (uint32_t number) {
70 uint16_t *pointer = (uint16_t *)&number;
72 return ((uint32_t)(ficlNetworkUnsigned16(pointer[0])<<16))|ficlNetworkUnsigned16(pointer[1]);
76 #define FICL_BIT_NUMBER(x) (1<<(x))
78 #define FICL_LZ_TYPE_BITS (1)
79 #define FICL_LZ_OFFSET_BITS (12)
80 #define FICL_LZ_LENGTH_BITS (5)
81 #define FICL_LZ_NEXT_BITS (8)
82 #define FICL_LZ_PHRASE_BITS (FICL_LZ_TYPE_BITS + FICL_LZ_OFFSET_BITS + FICL_LZ_LENGTH_BITS + FICL_LZ_NEXT_BITS)
83 #define FICL_LZ_SYMBOL_BITS (FICL_LZ_TYPE_BITS + FICL_LZ_NEXT_BITS)
86 ** if you match fewer characters than this, don't bother,
87 ** it's smaller to encode it as a sequence of symbol tokens.
88 **/
89 #define FICL_LZ_MINIMUM_USEFUL_MATCH ((int)(FICL_LZ_PHRASE_BITS / FICL_LZ_SYMBOL_BITS))
91 #define FICL_LZ_WINDOW_SIZE (FICL_BIT_NUMBER(FICL_LZ_OFFSET_BITS))
92 #define FICL_LZ_BUFFER_SIZE (FICL_BIT_NUMBER(FICL_LZ_LENGTH_BITS) + FICL_LZ_MINIMUM_USEFUL_MATCH)
94 #define FICL_MIN(a, b) (((a) < (b)) ? (a) : (b))
97 static inline void bitSet (unsigned char *bits, size_t index, int value) {
98 int byteIndex = index >> 3;
99 int bitIndex = index & 7;
100 unsigned char mask = (unsigned char)(128 >> bitIndex);
102 if (value) bits[byteIndex] |= mask; else bits[byteIndex] &= ~mask;
106 static int bitGet (const unsigned char *bits, size_t index) {
107 int byteIndex = index >> 3;
108 int bitIndex = index & 7;
109 unsigned char mask = (unsigned char)(128 >> bitIndex);
111 return ((mask & bits[byteIndex]) ? 1 : 0);
115 static int lzCompareWindow (const unsigned char *window, const unsigned char *buffer,
116 int *offset, unsigned char *next, int windowSize, int bufferSize)
118 const unsigned char *windowEnd, *bufferEnd, *windowTrace;
119 int longest = 0;
120 unsigned char bufferFirst = buffer[0];
122 *next = bufferFirst;
124 ** we can't match more than bufferSize-1 characters...
125 ** we need to reserve the last character for the "next",
126 ** and this also prevents us from returning FICL_LZ_BUFFER_LENGTH
127 ** as the length (which won't work, max we can store is FICL_LZ_BUFFER_LENGTH - 1)
129 --bufferSize;
130 windowEnd = window+windowSize;
131 bufferEnd = buffer+bufferSize;
133 for (windowTrace = window; windowTrace < windowEnd; ++windowTrace) {
134 const unsigned char *bufferTrace, *windowTrace2;
135 int length;
137 if (*windowTrace != bufferFirst) continue;
138 bufferTrace = buffer;
139 for (windowTrace2 = windowTrace; windowTrace2 < windowEnd && bufferTrace < bufferEnd && *windowTrace2 == *bufferTrace; ++windowTrace2, ++bufferTrace) ;
140 length = windowTrace2-windowTrace;
141 if (length > longest && length >= FICL_LZ_MINIMUM_USEFUL_MATCH) {
142 *offset = windowTrace-window;
143 longest = length;
144 *next = *bufferTrace;
148 return longest;
152 static void lzEncodeHeaderField (unsigned char *data, unsigned int input, int *byteOffset) {
153 if (input <= 252) {
154 data[(*byteOffset)++] = (unsigned char)input;
155 } else {
156 unsigned char id;
157 int length;
158 int inputPosition;
159 int bitsOffset;
161 if (input <= 65536) {
162 id = 253;
163 length = 2;
164 } else {
165 id = 254;
166 length = 4;
169 input = ficlNetworkUnsigned32(input);
170 inputPosition = (sizeof(uint32_t) * 8)-(length*8);
171 //bitsOffset;
172 data[(*byteOffset)++] = (unsigned char)id;
173 bitsOffset = *byteOffset*8;
174 (*byteOffset) += length;
176 for (int i = 0; i < (length * 8); i++) {
177 bitSet(data, bitsOffset++, bitGet((unsigned char *)&input, inputPosition++));
183 static int lzCompress (const char *src, size_t srcLen, unsigned char **pdest, size_t *pdestLen) {
184 unsigned char *compressed;
185 const unsigned char *window;
186 const unsigned char *buffer;
187 int outputPosition;
188 int remaining;
189 int windowSize;
190 int headerLength;
191 unsigned char headerBuffer[10];
192 int compressedSize;
193 int totalSize;
195 *pdest = NULL;
197 compressed = (unsigned char *)calloc(((srcLen*5)/4)+10, 1);
198 if (compressed == NULL) return -1;
200 window = buffer = (const unsigned char *)src;
202 outputPosition = 0;
203 remaining = srcLen;
204 windowSize = 0;
206 while (remaining > 0) {
207 int bufferSize = FICL_MIN(remaining, FICL_LZ_BUFFER_SIZE);
208 int useWindowSize = FICL_MIN(remaining, windowSize);
209 int offset = 0;
210 int i;
212 unsigned long token;
213 int tokenLength;
214 unsigned char next;
216 int length = lzCompareWindow(window, buffer, &offset, &next, useWindowSize, bufferSize);
218 if (length > 1) {
219 /* phrase token */
220 //assert((length - FICL_LZ_MINIMUM_USEFUL_MATCH) < (1 << FICL_LZ_LENGTH_BITS));
221 token = (1 << (FICL_LZ_PHRASE_BITS - 1))
222 | (offset << (FICL_LZ_PHRASE_BITS - FICL_LZ_TYPE_BITS - FICL_LZ_OFFSET_BITS))
223 | ((length - FICL_LZ_MINIMUM_USEFUL_MATCH) << (FICL_LZ_PHRASE_BITS - FICL_LZ_TYPE_BITS - FICL_LZ_OFFSET_BITS - FICL_LZ_LENGTH_BITS))
224 | next;
226 tokenLength = FICL_LZ_PHRASE_BITS;
227 } else {
228 token = next;
229 tokenLength = FICL_LZ_SYMBOL_BITS;
231 token = ficlNetworkUnsigned32(token);
232 for (i = 0; i < tokenLength; ++i) {
233 int inputPosition = (sizeof(uint32_t)*8)-tokenLength+i;
234 bitSet(compressed, outputPosition, bitGet((unsigned char *)&token, inputPosition));
235 ++outputPosition;
237 ++length;
238 buffer += length;
239 if (windowSize == FICL_LZ_WINDOW_SIZE) {
240 window += length;
241 } else {
242 if ((windowSize + length) < FICL_LZ_WINDOW_SIZE) {
243 windowSize += length;
244 } else {
245 window += (windowSize + length) - FICL_LZ_WINDOW_SIZE;
246 windowSize = FICL_LZ_WINDOW_SIZE;
249 remaining -= length;
251 headerLength = 0;
252 memset(&headerBuffer, 0, sizeof(headerBuffer));
253 lzEncodeHeaderField(headerBuffer, outputPosition, &headerLength);
254 lzEncodeHeaderField(headerBuffer, srcLen, &headerLength);
255 /* plug in header */
256 compressedSize = (((outputPosition-1)/8)+1);
257 totalSize = compressedSize+headerLength;
258 compressed = (unsigned char *)realloc(compressed, totalSize);
259 memmove(compressed+headerLength, compressed, compressedSize);
260 memcpy(compressed, headerBuffer, headerLength);
262 *pdest = compressed;
263 *pdestLen = totalSize;
265 return 0;
269 ////////////////////////////////////////////////////////////////////////////////
270 static void fatal (const char *msg) {
271 fprintf(stdout, "FATAL: %s\n", msg);
272 exit(1);
276 static char outdata[1024*1024];
277 static int outdatalen = 0;
280 static void outStr (const char *str) {
281 memmove(outdata+outdatalen, str, strlen(str));
282 outdatalen += strlen(str);
286 #define EMIT(ch) do { \
287 if (outp-outbuf >= sizeof(outbuf)) fatal("output line too big\n");\
288 *(outp++) = (ch); \
289 } while (0)
292 static inline void normSlashes (char *s) {
293 for (; *s; ++s) if (*s == '\\') *s = '/';
297 static int doDotC = 0, wasScreen, dontStrip = 0, dropSpaces, doCompress = 1;
299 static int lineno = 0;
300 static int inclevel = 0;
301 static const char *srcfname = "<cli>";
303 static void processFile (FILE *fout, const char *fname) {
304 FILE *fin;
305 char *p, *e, quoteCh, *outp;
306 static char buf[32768], outbuf[32768];
307 int olno = lineno;
308 const char *ofn = srcfname;
310 srcfname = fname;
311 lineno = 0;
313 if (++inclevel > 64) {
314 fclose(fout);
315 fprintf(stderr, "FATAL: too many nested includes, failed in file '%s', line %d\n", ofn, olno);
316 exit(1);
319 printf(": %s\n", fname);
320 if (!(fin = fopen(fname, "r"))) {
321 fclose(fout);
322 fprintf(stderr, "FATAL: can't open file '%s', failed in file '%s', line %d\n", fname, ofn, olno);
323 exit(1);
325 if (doDotC) {
326 if (!doCompress) fprintf(fout, "/* %s */\n", fname);
327 } else {
328 outStr("### ");
329 outStr(fname);
330 outStr(" ###\n");
333 while (fgets(buf, sizeof(buf), fin)) {
334 ++lineno;
336 if (buf[0] == '.') {
337 /* include */
338 char *fn, *t;
340 p = buf+1;
341 while (*p && *((unsigned char *)p) <= ' ') ++p;
342 if ((t = strchr(p, '#')) != NULL) *t = '\0';
343 for (t = p+strlen(p); t > p; --t) if (!isspace(t[-1])) break;
344 *t = '\0';
345 if (!p[0]) {
346 fclose(fout);
347 fprintf(stderr, "FATAL: invalid '.' in file '%s', line %d\n", fname, lineno);
348 exit(1);
350 fn = malloc(strlen(p)+strlen(fname)+64);
351 if (!fn) {
352 fclose(fout);
353 fprintf(stderr, "FATAL: out of memory in file '%s', line %d\n", fname, lineno);
354 exit(1);
356 strcpy(fn, fname);
357 normSlashes(fn);
358 if ((t = strrchr(fn, '/')) != NULL) t[1] = '\0'; else fn[0] = '\0';
359 strcat(fn, p);
360 processFile(fout, fn);
361 free(fn);
362 continue;
365 if (doDotC) {
366 #ifdef MKJAMBASE_COMPACT
367 if (!strncmp(buf, "#DONT_TOUCH", 11)) {
368 dontStrip = !dontStrip;
369 continue;
371 #else
372 dontStrip = 1;
373 #endif
374 p = buf;
375 /* strip leading whitespace */
376 if (!dontStrip) {
377 while (*p && *((unsigned char *)p) <= ' ') ++p;
378 /* drop comments and empty lines */
379 if (*p == '#' || !*p) continue;
381 /* copy; drop comments if # is not in quotes */
382 outp = outbuf; quoteCh = 0; wasScreen = 0;
383 if (!doCompress) EMIT('"');
384 dropSpaces = 0;
385 for (; *p && *p != '\n' && *p != '\r'; p++) {
386 if (!dontStrip) {
387 if (!quoteCh && !wasScreen && *p == '#') break; /* comment follows; drop it */
389 switch (*p) {
390 case '\\':
391 if (!doCompress) EMIT('\\');
392 EMIT('\\');
393 wasScreen = !wasScreen;
394 dropSpaces = 0;
395 break;
396 case '"':
397 if (!doCompress) EMIT('\\');
398 EMIT('"');
399 if (!wasScreen) quoteCh = (quoteCh==*p)?0:*p;
400 dropSpaces = 0;
401 break;
402 case '\x27': /* ' */
403 EMIT('\x27');
404 if (!wasScreen) quoteCh = (quoteCh==*p)?0:*p;
405 dropSpaces = 0;
406 break;
407 default:
408 if (!dontStrip && *((unsigned char *)p) <= ' ') {
409 if (wasScreen || !dropSpaces || quoteCh) EMIT(*p);
410 dropSpaces = !quoteCh;
411 } else {
412 EMIT(*p);
413 dropSpaces = 0;
415 wasScreen = 0;
416 break;
419 /* terminate output */
420 *outp = '\0';
421 if (!dontStrip) {
422 /* strip ending whitespace */
423 e = outp-1;
424 while (e >= outbuf && *((unsigned char *)e) <= ' ') --e;
425 *(++e) = '\0';
426 /* drop empty line */
427 if (!outbuf[0]) continue;
429 if (doCompress) {
430 outStr(outbuf);
431 outStr("\n");
432 } else {
433 fprintf(fout, "%s\\n\",\n", outbuf);
435 } else {
436 fprintf(fout, "%s", buf);
437 //outStr(buf);
440 fclose(fin);
441 --inclevel;
442 srcfname = ofn;
443 lineno = olno;
447 int main (int argc, char *argv[]) {
448 FILE *fout;
449 char *p;
451 if (argc < 3) {
452 fprintf(stderr, "usage: %s jambase.c Jambase ...\n", argv[0]);
453 return 1;
456 if (!(fout = fopen(argv[1], "wb"))) {
457 perror(argv[1]);
458 return 1;
461 /* if the file ends in .c generate a C source file */
462 if ((p = strrchr(argv[1], '.')) && !strcmp(p, ".c")) ++doDotC;
464 /* now process the files */
465 argc -= 2, argv += 2;
467 if (doDotC) {
468 fprintf(fout, "/* Generated by mkjambase from Jambase */\n");
469 if (doCompress) {
470 fprintf(fout, "char **jambase = 0;\n");
471 fprintf(fout, "unsigned char jambasepk[] = {");
472 } else {
473 fprintf(fout, "const char *jambase[] = {\n");
477 for (; argc--; ++argv) processFile(fout, *argv);
479 if (outdatalen > 0) {
480 if (doCompress) {
481 unsigned char *dest;
482 size_t destlen;
483 int cnt = 0;
485 if (lzCompress(outdata, outdatalen, &dest, &destlen) != 0) {
486 perror("compression error!");
487 return 1;
489 //fwrite(dest, destlen, 1, fout);
490 for (int f = 0; f < destlen; ++f) {
491 if (cnt <= 0) {
492 fputc('\n', fout);
493 cnt = 16;
495 --cnt;
496 fprintf(fout, "0x%02x,", dest[f]);
498 if (cnt > 0) fputc('\n', fout);
499 } else {
500 perror("wtf?!");
501 return 1;
502 //fwrite(outdata, outdatalen, 1, fout);
506 if (doDotC) {
507 if (!doCompress) fputc('0', fout);
508 fprintf(fout, "};\n");
509 if (doCompress) fprintf(fout, "\nint jbpksize (void) { return sizeof(jambasepk); }\n");
512 fclose(fout);
513 return 0;