2 * Copyright 1993, 1995 Christopher Seiwald.
4 * This file is part of Jam - see jam.c for Copyright information.
8 * mkjambase.c - turn Jambase into a big C structure
10 * Usage: mkjambase jambase.c Jambase ...
12 * Results look like this:
19 * Handles \'s and "'s specially; knows how to delete blank and comment lines.
21 * 11/04/02 (seiwald) - const-ing for string literals
30 //#define MKJAMBASE_COMPACT
33 ////////////////////////////////////////////////////////////////////////////////
34 // compression code license (ONLY compression code)
36 // Copyright (c) 1997-2001 John Sadler (john_sadler@alum.mit.edu)
37 // All rights reserved.
39 // Redistribution and use in source and binary forms, with or without
40 // modification, are permitted provided that the following conditions
43 // * Redistributions of source code must retain the above copyright
44 // notice, this list of conditions and the following disclaimer.
46 // * Redistributions in binary form must reproduce the above copyright
47 // notice, this list of conditions and the following disclaimer in the
48 // documentation and/or other materials provided with the distribution.
50 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
54 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 static uint16_t ficlNetworkUnsigned16 (uint16_t number
) {
63 uint8_t *pointer
= (uint8_t *)&number
;
65 return (uint16_t)(((uint16_t)(pointer
[0]<<8))|(pointer
[1]));
69 static uint32_t ficlNetworkUnsigned32 (uint32_t number
) {
70 uint16_t *pointer
= (uint16_t *)&number
;
72 return ((uint32_t)(ficlNetworkUnsigned16(pointer
[0])<<16))|ficlNetworkUnsigned16(pointer
[1]);
76 #define FICL_BIT_NUMBER(x) (1<<(x))
78 #define FICL_LZ_TYPE_BITS (1)
79 #define FICL_LZ_OFFSET_BITS (12)
80 #define FICL_LZ_LENGTH_BITS (5)
81 #define FICL_LZ_NEXT_BITS (8)
82 #define FICL_LZ_PHRASE_BITS (FICL_LZ_TYPE_BITS + FICL_LZ_OFFSET_BITS + FICL_LZ_LENGTH_BITS + FICL_LZ_NEXT_BITS)
83 #define FICL_LZ_SYMBOL_BITS (FICL_LZ_TYPE_BITS + FICL_LZ_NEXT_BITS)
86 ** if you match fewer characters than this, don't bother,
87 ** it's smaller to encode it as a sequence of symbol tokens.
89 #define FICL_LZ_MINIMUM_USEFUL_MATCH ((int)(FICL_LZ_PHRASE_BITS / FICL_LZ_SYMBOL_BITS))
91 #define FICL_LZ_WINDOW_SIZE (FICL_BIT_NUMBER(FICL_LZ_OFFSET_BITS))
92 #define FICL_LZ_BUFFER_SIZE (FICL_BIT_NUMBER(FICL_LZ_LENGTH_BITS) + FICL_LZ_MINIMUM_USEFUL_MATCH)
94 #define FICL_MIN(a, b) (((a) < (b)) ? (a) : (b))
97 static inline void bitSet (unsigned char *bits
, size_t index
, int value
) {
98 int byteIndex
= index
>> 3;
99 int bitIndex
= index
& 7;
100 unsigned char mask
= (unsigned char)(128 >> bitIndex
);
102 if (value
) bits
[byteIndex
] |= mask
; else bits
[byteIndex
] &= ~mask
;
106 static int bitGet (const unsigned char *bits
, size_t index
) {
107 int byteIndex
= index
>> 3;
108 int bitIndex
= index
& 7;
109 unsigned char mask
= (unsigned char)(128 >> bitIndex
);
111 return ((mask
& bits
[byteIndex
]) ? 1 : 0);
115 static int lzCompareWindow (const unsigned char *window
, const unsigned char *buffer
,
116 int *offset
, unsigned char *next
, int windowSize
, int bufferSize
)
118 const unsigned char *windowEnd
, *bufferEnd
, *windowTrace
;
120 unsigned char bufferFirst
= buffer
[0];
124 ** we can't match more than bufferSize-1 characters...
125 ** we need to reserve the last character for the "next",
126 ** and this also prevents us from returning FICL_LZ_BUFFER_LENGTH
127 ** as the length (which won't work, max we can store is FICL_LZ_BUFFER_LENGTH - 1)
130 windowEnd
= window
+windowSize
;
131 bufferEnd
= buffer
+bufferSize
;
133 for (windowTrace
= window
; windowTrace
< windowEnd
; ++windowTrace
) {
134 const unsigned char *bufferTrace
, *windowTrace2
;
137 if (*windowTrace
!= bufferFirst
) continue;
138 bufferTrace
= buffer
;
139 for (windowTrace2
= windowTrace
; windowTrace2
< windowEnd
&& bufferTrace
< bufferEnd
&& *windowTrace2
== *bufferTrace
; ++windowTrace2
, ++bufferTrace
) ;
140 length
= windowTrace2
-windowTrace
;
141 if (length
> longest
&& length
>= FICL_LZ_MINIMUM_USEFUL_MATCH
) {
142 *offset
= windowTrace
-window
;
144 *next
= *bufferTrace
;
152 static void lzEncodeHeaderField (unsigned char *data
, unsigned int input
, int *byteOffset
) {
154 data
[(*byteOffset
)++] = (unsigned char)input
;
161 if (input
<= 65536) {
169 input
= ficlNetworkUnsigned32(input
);
170 inputPosition
= (sizeof(uint32_t) * 8)-(length
*8);
172 data
[(*byteOffset
)++] = (unsigned char)id
;
173 bitsOffset
= *byteOffset
*8;
174 (*byteOffset
) += length
;
176 for (int i
= 0; i
< (length
* 8); i
++) {
177 bitSet(data
, bitsOffset
++, bitGet((unsigned char *)&input
, inputPosition
++));
183 static int lzCompress (const char *src
, size_t srcLen
, unsigned char **pdest
, size_t *pdestLen
) {
184 unsigned char *compressed
;
185 const unsigned char *window
;
186 const unsigned char *buffer
;
191 unsigned char headerBuffer
[10];
197 compressed
= (unsigned char *)calloc(((srcLen
*5)/4)+10, 1);
198 if (compressed
== NULL
) return -1;
200 window
= buffer
= (const unsigned char *)src
;
206 while (remaining
> 0) {
207 int bufferSize
= FICL_MIN(remaining
, FICL_LZ_BUFFER_SIZE
);
208 int useWindowSize
= FICL_MIN(remaining
, windowSize
);
216 int length
= lzCompareWindow(window
, buffer
, &offset
, &next
, useWindowSize
, bufferSize
);
220 //assert((length - FICL_LZ_MINIMUM_USEFUL_MATCH) < (1 << FICL_LZ_LENGTH_BITS));
221 token
= (1 << (FICL_LZ_PHRASE_BITS
- 1))
222 | (offset
<< (FICL_LZ_PHRASE_BITS
- FICL_LZ_TYPE_BITS
- FICL_LZ_OFFSET_BITS
))
223 | ((length
- FICL_LZ_MINIMUM_USEFUL_MATCH
) << (FICL_LZ_PHRASE_BITS
- FICL_LZ_TYPE_BITS
- FICL_LZ_OFFSET_BITS
- FICL_LZ_LENGTH_BITS
))
226 tokenLength
= FICL_LZ_PHRASE_BITS
;
229 tokenLength
= FICL_LZ_SYMBOL_BITS
;
231 token
= ficlNetworkUnsigned32(token
);
232 for (i
= 0; i
< tokenLength
; ++i
) {
233 int inputPosition
= (sizeof(uint32_t)*8)-tokenLength
+i
;
234 bitSet(compressed
, outputPosition
, bitGet((unsigned char *)&token
, inputPosition
));
239 if (windowSize
== FICL_LZ_WINDOW_SIZE
) {
242 if ((windowSize
+ length
) < FICL_LZ_WINDOW_SIZE
) {
243 windowSize
+= length
;
245 window
+= (windowSize
+ length
) - FICL_LZ_WINDOW_SIZE
;
246 windowSize
= FICL_LZ_WINDOW_SIZE
;
252 memset(&headerBuffer
, 0, sizeof(headerBuffer
));
253 lzEncodeHeaderField(headerBuffer
, outputPosition
, &headerLength
);
254 lzEncodeHeaderField(headerBuffer
, srcLen
, &headerLength
);
256 compressedSize
= (((outputPosition
-1)/8)+1);
257 totalSize
= compressedSize
+headerLength
;
258 compressed
= (unsigned char *)realloc(compressed
, totalSize
);
259 memmove(compressed
+headerLength
, compressed
, compressedSize
);
260 memcpy(compressed
, headerBuffer
, headerLength
);
263 *pdestLen
= totalSize
;
269 ////////////////////////////////////////////////////////////////////////////////
270 static void fatal (const char *msg
) {
271 fprintf(stdout
, "FATAL: %s\n", msg
);
276 static char outdata
[1024*1024];
277 static int outdatalen
= 0;
280 static void outStr (const char *str
) {
281 memmove(outdata
+outdatalen
, str
, strlen(str
));
282 outdatalen
+= strlen(str
);
286 #define EMIT(ch) do { \
287 if (outp-outbuf >= sizeof(outbuf)) fatal("output line too big\n");\
292 static inline void normSlashes (char *s
) {
293 for (; *s
; ++s
) if (*s
== '\\') *s
= '/';
297 static int doDotC
= 0, wasScreen
, dontStrip
= 0, dropSpaces
, doCompress
= 1;
299 static int lineno
= 0;
300 static int inclevel
= 0;
301 static const char *srcfname
= "<cli>";
303 static void processFile (FILE *fout
, const char *fname
) {
305 char *p
, *e
, quoteCh
, *outp
;
306 static char buf
[32768], outbuf
[32768];
308 const char *ofn
= srcfname
;
313 if (++inclevel
> 64) {
315 fprintf(stderr
, "FATAL: too many nested includes, failed in file '%s', line %d\n", ofn
, olno
);
319 printf(": %s\n", fname
);
320 if (!(fin
= fopen(fname
, "r"))) {
322 fprintf(stderr
, "FATAL: can't open file '%s', failed in file '%s', line %d\n", fname
, ofn
, olno
);
326 if (!doCompress
) fprintf(fout
, "/* %s */\n", fname
);
333 while (fgets(buf
, sizeof(buf
), fin
)) {
341 while (*p
&& *((unsigned char *)p
) <= ' ') ++p
;
342 if ((t
= strchr(p
, '#')) != NULL
) *t
= '\0';
343 for (t
= p
+strlen(p
); t
> p
; --t
) if (!isspace(t
[-1])) break;
347 fprintf(stderr
, "FATAL: invalid '.' in file '%s', line %d\n", fname
, lineno
);
350 fn
= malloc(strlen(p
)+strlen(fname
)+64);
353 fprintf(stderr
, "FATAL: out of memory in file '%s', line %d\n", fname
, lineno
);
358 if ((t
= strrchr(fn
, '/')) != NULL
) t
[1] = '\0'; else fn
[0] = '\0';
360 processFile(fout
, fn
);
366 #ifdef MKJAMBASE_COMPACT
367 if (!strncmp(buf
, "#DONT_TOUCH", 11)) {
368 dontStrip
= !dontStrip
;
375 /* strip leading whitespace */
377 while (*p
&& *((unsigned char *)p
) <= ' ') ++p
;
378 /* drop comments and empty lines */
379 if (*p
== '#' || !*p
) continue;
381 /* copy; drop comments if # is not in quotes */
382 outp
= outbuf
; quoteCh
= 0; wasScreen
= 0;
383 if (!doCompress
) EMIT('"');
385 for (; *p
&& *p
!= '\n' && *p
!= '\r'; p
++) {
387 if (!quoteCh
&& !wasScreen
&& *p
== '#') break; /* comment follows; drop it */
391 if (!doCompress
) EMIT('\\');
393 wasScreen
= !wasScreen
;
397 if (!doCompress
) EMIT('\\');
399 if (!wasScreen
) quoteCh
= (quoteCh
==*p
)?0:*p
;
404 if (!wasScreen
) quoteCh
= (quoteCh
==*p
)?0:*p
;
408 if (!dontStrip
&& *((unsigned char *)p
) <= ' ') {
409 if (wasScreen
|| !dropSpaces
|| quoteCh
) EMIT(*p
);
410 dropSpaces
= !quoteCh
;
419 /* terminate output */
422 /* strip ending whitespace */
424 while (e
>= outbuf
&& *((unsigned char *)e
) <= ' ') --e
;
426 /* drop empty line */
427 if (!outbuf
[0]) continue;
433 fprintf(fout
, "%s\\n\",\n", outbuf
);
436 fprintf(fout
, "%s", buf
);
447 int main (int argc
, char *argv
[]) {
452 fprintf(stderr
, "usage: %s jambase.c Jambase ...\n", argv
[0]);
456 if (!(fout
= fopen(argv
[1], "wb"))) {
461 /* if the file ends in .c generate a C source file */
462 if ((p
= strrchr(argv
[1], '.')) && !strcmp(p
, ".c")) ++doDotC
;
464 /* now process the files */
465 argc
-= 2, argv
+= 2;
468 fprintf(fout
, "/* Generated by mkjambase from Jambase */\n");
470 fprintf(fout
, "char **jambase = 0;\n");
471 fprintf(fout
, "unsigned char jambasepk[] = {");
473 fprintf(fout
, "const char *jambase[] = {\n");
477 for (; argc
--; ++argv
) processFile(fout
, *argv
);
479 if (outdatalen
> 0) {
485 if (lzCompress(outdata
, outdatalen
, &dest
, &destlen
) != 0) {
486 perror("compression error!");
489 //fwrite(dest, destlen, 1, fout);
490 for (int f
= 0; f
< destlen
; ++f
) {
496 fprintf(fout
, "0x%02x,", dest
[f
]);
498 if (cnt
> 0) fputc('\n', fout
);
502 //fwrite(outdata, outdatalen, 1, fout);
507 if (!doCompress
) fputc('0', fout
);
508 fprintf(fout
, "};\n");
509 if (doCompress
) fprintf(fout
, "\nint jbpksize (void) { return sizeof(jambasepk); }\n");