Update README.md
[sm64pc.git] / tools / libmio0.c
blob2c7a493fc0fab19ddc5cb823ffbe8cf6fbd6ec28
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #if defined(_WIN32) || defined(_WIN64)
5 #include <io.h>
6 #include <fcntl.h>
7 #endif
9 #include "libmio0.h"
10 #include "utils.h"
12 // defines
14 #define MIO0_VERSION "0.1"
16 #define GET_BIT(buf, bit) ((buf)[(bit) / 8] & (1 << (7 - ((bit) % 8))))
18 // types
19 typedef struct
21 int *indexes;
22 int allocated;
23 int count;
24 int start;
25 } lookback;
27 // functions
28 #define LOOKBACK_COUNT 256
29 #define LOOKBACK_INIT_SIZE 128
30 static lookback *lookback_init(void)
32 lookback *lb = malloc(LOOKBACK_COUNT * sizeof(*lb));
33 for (int i = 0; i < LOOKBACK_COUNT; i++) {
34 lb[i].allocated = LOOKBACK_INIT_SIZE;
35 lb[i].indexes = malloc(lb[i].allocated * sizeof(*lb[i].indexes));
36 lb[i].count = 0;
37 lb[i].start = 0;
39 return lb;
42 static void lookback_free(lookback *lb)
44 for (int i = 0; i < LOOKBACK_COUNT; i++) {
45 free(lb[i].indexes);
47 free(lb);
50 static inline void lookback_push(lookback *lkbk, unsigned char val, int index)
52 lookback *lb = &lkbk[val];
53 if (lb->count == lb->allocated) {
54 lb->allocated *= 4;
55 lb->indexes = realloc(lb->indexes, lb->allocated * sizeof(*lb->indexes));
57 lb->indexes[lb->count++] = index;
60 static void PUT_BIT(unsigned char *buf, int bit, int val)
62 unsigned char mask = 1 << (7 - (bit % 8));
63 unsigned int offset = bit / 8;
64 buf[offset] = (buf[offset] & ~(mask)) | (val ? mask : 0);
67 // used to find longest matching stream in buffer
68 // buf: buffer
69 // start_offset: offset in buf to look back from
70 // max_search: max number of bytes to find
71 // found_offset: returned offset found (0 if none found)
72 // returns max length of matching stream (0 if none found)
73 static int find_longest(const unsigned char *buf, int start_offset, int max_search, int *found_offset, lookback *lkbk)
75 int best_length = 0;
76 int best_offset = 0;
77 int cur_length;
78 int search_len;
79 int farthest, off, i;
80 int lb_idx;
81 const unsigned char first = buf[start_offset];
82 lookback *lb = &lkbk[first];
84 // buf
85 // | off start max
86 // V |+i-> |+i-> |
87 // |--------------raw-data-----------------|
88 // |+i-> | |+i->
89 // +cur_length
91 // check at most the past 4096 values
92 farthest = MAX(start_offset - 4096, 0);
93 // find starting index
94 for (lb_idx = lb->start; lb_idx < lb->count && lb->indexes[lb_idx] < farthest; lb_idx++) {}
95 lb->start = lb_idx;
96 for ( ; lb_idx < lb->count && lb->indexes[lb_idx] < start_offset; lb_idx++) {
97 off = lb->indexes[lb_idx];
98 // check at most requested max or up until start
99 search_len = MIN(max_search, start_offset - off);
100 for (i = 0; i < search_len; i++) {
101 if (buf[start_offset + i] != buf[off + i]) {
102 break;
105 cur_length = i;
106 // if matched up until start, continue matching in already matched parts
107 if (cur_length == search_len) {
108 // check at most requested max less current length
109 search_len = max_search - cur_length;
110 for (i = 0; i < search_len; i++) {
111 if (buf[start_offset + cur_length + i] != buf[off + i]) {
112 break;
115 cur_length += i;
117 if (cur_length > best_length) {
118 best_offset = start_offset - off;
119 best_length = cur_length;
123 // return best reverse offset and length (may be 0)
124 *found_offset = best_offset;
125 return best_length;
128 // decode MIO0 header
129 // returns 1 if valid header, 0 otherwise
130 int mio0_decode_header(const unsigned char *buf, mio0_header_t *head)
132 if (!memcmp(buf, "MIO0", 4)) {
133 head->dest_size = read_u32_be(&buf[4]);
134 head->comp_offset = read_u32_be(&buf[8]);
135 head->uncomp_offset = read_u32_be(&buf[12]);
136 return 1;
138 return 0;
141 void mio0_encode_header(unsigned char *buf, const mio0_header_t *head)
143 memcpy(buf, "MIO0", 4);
144 write_u32_be(&buf[4], head->dest_size);
145 write_u32_be(&buf[8], head->comp_offset);
146 write_u32_be(&buf[12], head->uncomp_offset);
149 int mio0_decode(const unsigned char *in, unsigned char *out, unsigned int *end)
151 mio0_header_t head;
152 unsigned int bytes_written = 0;
153 int bit_idx = 0;
154 int comp_idx = 0;
155 int uncomp_idx = 0;
156 int valid;
158 // extract header
159 valid = mio0_decode_header(in, &head);
160 // verify MIO0 header
161 if (!valid) {
162 return -2;
165 // decode data
166 while (bytes_written < head.dest_size) {
167 if (GET_BIT(&in[MIO0_HEADER_LENGTH], bit_idx)) {
168 // 1 - pull uncompressed data
169 out[bytes_written] = in[head.uncomp_offset + uncomp_idx];
170 bytes_written++;
171 uncomp_idx++;
172 } else {
173 // 0 - read compressed data
174 int idx;
175 int length;
176 int i;
177 const unsigned char *vals = &in[head.comp_offset + comp_idx];
178 comp_idx += 2;
179 length = ((vals[0] & 0xF0) >> 4) + 3;
180 idx = ((vals[0] & 0x0F) << 8) + vals[1] + 1;
181 for (i = 0; i < length; i++) {
182 out[bytes_written] = out[bytes_written - idx];
183 bytes_written++;
186 bit_idx++;
189 if (end) {
190 *end = head.uncomp_offset + uncomp_idx;
193 return bytes_written;
196 int mio0_encode(const unsigned char *in, unsigned int length, unsigned char *out)
198 unsigned char *bit_buf;
199 unsigned char *comp_buf;
200 unsigned char *uncomp_buf;
201 unsigned int bit_length;
202 unsigned int comp_offset;
203 unsigned int uncomp_offset;
204 unsigned int bytes_proc = 0;
205 int bytes_written;
206 int bit_idx = 0;
207 int comp_idx = 0;
208 int uncomp_idx = 0;
209 lookback *lookbacks;
211 // initialize lookback buffer
212 lookbacks = lookback_init();
214 // allocate some temporary buffers worst case size
215 bit_buf = malloc((length + 7) / 8); // 1-bit/byte
216 comp_buf = malloc(length); // 16-bits/2bytes
217 uncomp_buf = malloc(length); // all uncompressed
218 memset(bit_buf, 0, (length + 7) / 8);
220 // encode data
221 // special case for first byte
222 lookback_push(lookbacks, in[0], 0);
223 uncomp_buf[uncomp_idx] = in[0];
224 uncomp_idx += 1;
225 bytes_proc += 1;
226 PUT_BIT(bit_buf, bit_idx++, 1);
227 while (bytes_proc < length) {
228 int offset;
229 int max_length = MIN(length - bytes_proc, 18);
230 int longest_match = find_longest(in, bytes_proc, max_length, &offset, lookbacks);
231 // push current byte before checking next longer match
232 lookback_push(lookbacks, in[bytes_proc], bytes_proc);
233 if (longest_match > 2) {
234 int lookahead_offset;
235 // lookahead to next byte to see if longer match
236 int lookahead_length = MIN(length - bytes_proc - 1, 18);
237 int lookahead_match = find_longest(in, bytes_proc + 1, lookahead_length, &lookahead_offset, lookbacks);
238 // better match found, use uncompressed + lookahead compressed
239 if ((longest_match + 1) < lookahead_match) {
240 // uncompressed byte
241 uncomp_buf[uncomp_idx] = in[bytes_proc];
242 uncomp_idx++;
243 PUT_BIT(bit_buf, bit_idx, 1);
244 bytes_proc++;
245 longest_match = lookahead_match;
246 offset = lookahead_offset;
247 bit_idx++;
248 lookback_push(lookbacks, in[bytes_proc], bytes_proc);
250 // first byte already pushed above
251 for (int i = 1; i < longest_match; i++) {
252 lookback_push(lookbacks, in[bytes_proc + i], bytes_proc + i);
254 // compressed block
255 comp_buf[comp_idx] = (((longest_match - 3) & 0x0F) << 4) |
256 (((offset - 1) >> 8) & 0x0F);
257 comp_buf[comp_idx + 1] = (offset - 1) & 0xFF;
258 comp_idx += 2;
259 PUT_BIT(bit_buf, bit_idx, 0);
260 bytes_proc += longest_match;
261 } else {
262 // uncompressed byte
263 uncomp_buf[uncomp_idx] = in[bytes_proc];
264 uncomp_idx++;
265 PUT_BIT(bit_buf, bit_idx, 1);
266 bytes_proc++;
268 bit_idx++;
271 // compute final sizes and offsets
272 // +7 so int division accounts for all bits
273 bit_length = ((bit_idx + 7) / 8);
274 // compressed data after control bits and aligned to 4-byte boundary
275 comp_offset = ALIGN(MIO0_HEADER_LENGTH + bit_length, 4);
276 uncomp_offset = comp_offset + comp_idx;
277 bytes_written = uncomp_offset + uncomp_idx;
279 // output header
280 memcpy(out, "MIO0", 4);
281 write_u32_be(&out[4], length);
282 write_u32_be(&out[8], comp_offset);
283 write_u32_be(&out[12], uncomp_offset);
284 // output data
285 memcpy(&out[MIO0_HEADER_LENGTH], bit_buf, bit_length);
286 memcpy(&out[comp_offset], comp_buf, comp_idx);
287 memcpy(&out[uncomp_offset], uncomp_buf, uncomp_idx);
289 // free allocated buffers
290 free(bit_buf);
291 free(comp_buf);
292 free(uncomp_buf);
293 lookback_free(lookbacks);
295 return bytes_written;
298 static FILE *mio0_open_out_file(const char *out_file) {
299 if (strcmp(out_file, "-") == 0) {
300 #if defined(_WIN32) || defined(_WIN64)
301 _setmode(_fileno(stdout), _O_BINARY);
302 #endif
303 return stdout;
304 } else {
305 return fopen(out_file, "wb");
309 int mio0_decode_file(const char *in_file, unsigned long offset, const char *out_file)
311 mio0_header_t head;
312 FILE *in;
313 FILE *out;
314 unsigned char *in_buf = NULL;
315 unsigned char *out_buf = NULL;
316 long file_size;
317 int ret_val = 0;
318 size_t bytes_read;
319 int bytes_decoded;
320 int bytes_written;
321 int valid;
323 in = fopen(in_file, "rb");
324 if (in == NULL) {
325 return 1;
328 // allocate buffer to read from offset to end of file
329 fseek(in, 0, SEEK_END);
330 file_size = ftell(in);
331 in_buf = malloc(file_size - offset);
332 fseek(in, offset, SEEK_SET);
334 // read bytes
335 bytes_read = fread(in_buf, 1, file_size - offset, in);
336 if (bytes_read != file_size - offset) {
337 ret_val = 2;
338 goto free_all;
341 // verify header
342 valid = mio0_decode_header(in_buf, &head);
343 if (!valid) {
344 ret_val = 3;
345 goto free_all;
347 out_buf = malloc(head.dest_size);
349 // decompress MIO0 encoded data
350 bytes_decoded = mio0_decode(in_buf, out_buf, NULL);
351 if (bytes_decoded < 0) {
352 ret_val = 3;
353 goto free_all;
356 // open output file
357 out = mio0_open_out_file(out_file);
358 if (out == NULL) {
359 ret_val = 4;
360 goto free_all;
363 // write data to file
364 bytes_written = fwrite(out_buf, 1, bytes_decoded, out);
365 if (bytes_written != bytes_decoded) {
366 ret_val = 5;
369 // clean up
370 if (out != stdout) {
371 fclose(out);
373 free_all:
374 if (out_buf) {
375 free(out_buf);
377 if (in_buf) {
378 free(in_buf);
380 fclose(in);
382 return ret_val;
385 int mio0_encode_file(const char *in_file, const char *out_file)
387 FILE *in;
388 FILE *out;
389 unsigned char *in_buf = NULL;
390 unsigned char *out_buf = NULL;
391 size_t file_size;
392 size_t bytes_read;
393 int bytes_encoded;
394 int bytes_written;
395 int ret_val = 0;
397 in = fopen(in_file, "rb");
398 if (in == NULL) {
399 return 1;
402 // allocate buffer to read entire contents of files
403 fseek(in, 0, SEEK_END);
404 file_size = ftell(in);
405 fseek(in, 0, SEEK_SET);
406 in_buf = malloc(file_size);
408 // read bytes
409 bytes_read = fread(in_buf, 1, file_size, in);
410 if (bytes_read != file_size) {
411 ret_val = 2;
412 goto free_all;
415 // allocate worst case length
416 out_buf = malloc(MIO0_HEADER_LENGTH + ((file_size+7)/8) + file_size);
418 // compress data in MIO0 format
419 bytes_encoded = mio0_encode(in_buf, file_size, out_buf);
421 // open output file
422 out = mio0_open_out_file(out_file);
423 if (out == NULL) {
424 ret_val = 4;
425 goto free_all;
428 // write data to file
429 bytes_written = fwrite(out_buf, 1, bytes_encoded, out);
430 if (bytes_written != bytes_encoded) {
431 ret_val = 5;
434 // clean up
435 if (out != stdout) {
436 fclose(out);
438 free_all:
439 if (out_buf) {
440 free(out_buf);
442 if (in_buf) {
443 free(in_buf);
445 fclose(in);
447 return ret_val;
450 // mio0 standalone executable
451 #ifdef MIO0_STANDALONE
452 typedef struct
454 char *in_filename;
455 char *out_filename;
456 unsigned int offset;
457 int compress;
458 } arg_config;
460 static arg_config default_config =
462 NULL,
463 NULL,
468 static void print_usage(void)
470 ERROR("Usage: mio0 [-c / -d] [-o OFFSET] FILE [OUTPUT]\n"
471 "\n"
472 "mio0 v" MIO0_VERSION ": MIO0 compression and decompression tool\n"
473 "\n"
474 "Optional arguments:\n"
475 " -c compress raw data into MIO0 (default: compress)\n"
476 " -d decompress MIO0 into raw data\n"
477 " -o OFFSET starting offset in FILE (default: 0)\n"
478 "\n"
479 "File arguments:\n"
480 " FILE input file\n"
481 " [OUTPUT] output file (default: FILE.out), \"-\" for stdout\n");
482 exit(1);
485 // parse command line arguments
486 static void parse_arguments(int argc, char *argv[], arg_config *config)
488 int i;
489 int file_count = 0;
490 if (argc < 2) {
491 print_usage();
492 exit(1);
494 for (i = 1; i < argc; i++) {
495 if (argv[i][0] == '-' && argv[i][1] != '\0') {
496 switch (argv[i][1]) {
497 case 'c':
498 config->compress = 1;
499 break;
500 case 'd':
501 config->compress = 0;
502 break;
503 case 'o':
504 if (++i >= argc) {
505 print_usage();
507 config->offset = strtoul(argv[i], NULL, 0);
508 break;
509 default:
510 print_usage();
511 break;
513 } else {
514 switch (file_count) {
515 case 0:
516 config->in_filename = argv[i];
517 break;
518 case 1:
519 config->out_filename = argv[i];
520 break;
521 default: // too many
522 print_usage();
523 break;
525 file_count++;
528 if (file_count < 1) {
529 print_usage();
533 int main(int argc, char *argv[])
535 char out_filename[FILENAME_MAX];
536 arg_config config;
537 int ret_val;
539 // get configuration from arguments
540 config = default_config;
541 parse_arguments(argc, argv, &config);
542 if (config.out_filename == NULL) {
543 config.out_filename = out_filename;
544 sprintf(config.out_filename, "%s.out", config.in_filename);
547 // operation
548 if (config.compress) {
549 ret_val = mio0_encode_file(config.in_filename, config.out_filename);
550 } else {
551 ret_val = mio0_decode_file(config.in_filename, config.offset, config.out_filename);
554 switch (ret_val) {
555 case 1:
556 ERROR("Error opening input file \"%s\"\n", config.in_filename);
557 break;
558 case 2:
559 ERROR("Error reading from input file \"%s\"\n", config.in_filename);
560 break;
561 case 3:
562 ERROR("Error decoding MIO0 data. Wrong offset (0x%X)?\n", config.offset);
563 break;
564 case 4:
565 ERROR("Error opening output file \"%s\"\n", config.out_filename);
566 break;
567 case 5:
568 ERROR("Error writing bytes to output file \"%s\"\n", config.out_filename);
569 break;
572 return ret_val;
574 #endif // MIO0_STANDALONE