add completion callback facility to the interpreter
[philodendron.git] / runtime / bc_read_stream.c
blob2ad55b03c7755cf52926c4ec080367880db7e8c5
2 #include "bc_read_stream.h"
4 #define OP_ENCODING_FIXED 1
5 #define OP_ENCODING_VBR 2
6 #define OP_ENCODING_ARRAY 3
7 #define OP_ENCODING_CHAR6 4
9 #define ABBREV_ID_END_BLOCK 0
10 #define ABBREV_ID_ENTER_SUBBLOCK 1
11 #define ABBREV_ID_DEFINE_ABBREV 2
12 #define ABBREV_ID_UNABBREV_RECORD 3
14 #define STDBLOCK_BLOCKINFO 0
16 #define BLOCKINFO_BLOCK_SETBID 1
18 #define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
19 if(size < desired_size) \
20 { \
21 size *= 2; \
22 ptr = realloc(ptr, size*sizeof(*ptr)); \
25 #include <stdio.h>
26 #include <stdlib.h>
28 struct blockinfo {
29 uint32_t block_id;
30 int num_abbreviations;
31 int size_abbreviations;
32 struct blockinfo_abbrev {
33 int num_operands;
34 struct abbrev_operand *operands;
35 } *abbreviations;
38 struct stream_stack_entry
40 union {
41 struct block_metadata {
42 int abbrev_len;
43 int block_id;
44 int block_offset;
45 int block_len;
46 } block_metadata;
48 struct {
49 int first_operand_offset;
50 int num_operands;
51 } abbrev;
52 } e;
54 enum EntryType {
55 BlockMetadata,
56 Abbreviation
57 } type;
60 struct abbrev_operand
62 union {
63 long long literal_value;
64 struct {
65 unsigned char encoding;
66 int value;
67 } encoding_info;
68 } o;
70 enum OperandType {
71 Literal,
72 EncodingInfo
73 } type;
76 struct bc_read_stream
78 /* Values for the stream */
79 FILE *infile;
80 uint32_t next_bits;
81 int num_next_bits;
82 int stream_err;
83 int stream_offset;
85 struct stream_stack_entry *old_block_metadata;
87 /* Values for the current block */
88 int abbrev_len;
89 int num_abbrevs;
90 struct stream_stack_entry *block_metadata;
91 struct blockinfo *blockinfo;
93 /* Values for the current record */
94 enum RecordType record_type;
96 /* - for data records */
97 int record_id;
98 int current_record_size;
99 int current_record_offset;
100 int record_buf_size;
101 uint64_t *record_buf;
103 /* - for StartBlock records */
104 int block_id;
105 int block_len;
107 /* - for DefineAbbrev records */
108 int record_size_abbrev;
109 int record_num_abbrev;
110 struct abbrev_operand *record_abbrev_operands;
113 /* The stream stack */
114 int stream_stack_size;
115 int stream_stack_len;
116 struct stream_stack_entry *stream_stack;
118 int abbrev_operands_size;
119 int abbrev_operands_len;
120 struct abbrev_operand *abbrev_operands;
122 /* Data about blockinfo records we have encountered */
123 int blockinfo_size;
124 int blockinfo_len;
125 struct blockinfo *blockinfos;
129 void print_abbrev(struct abbrev_operand *operands, int num_operands)
131 printf("Abbrev: num_operands=%d\n", num_operands);
132 for(int i = 0; i < num_operands; i++)
134 struct abbrev_operand *o = &operands[i];
135 if(o->type == Literal)
137 printf(" Literal value: %llu\n", o->o.literal_value);
139 else if(o->type == EncodingInfo)
141 printf(" EncodingInfo: encoding=%u, value=%d\n", o->o.encoding_info.encoding,
142 o->o.encoding_info.value);
147 void dump_stack(struct bc_read_stream *s)
149 printf("Stream stack: %d entries\n", s->stream_stack_len);
150 for(int i = 0; i < s->stream_stack_len; i++)
152 printf("- ");
153 struct stream_stack_entry *e = &s->stream_stack[i];
154 if(e->type == Abbreviation)
156 print_abbrev(s->abbrev_operands + e->e.abbrev.first_operand_offset, e->e.abbrev.num_operands);
158 else if(e->type == BlockMetadata)
160 printf("BlockMetadata: abbrev_len=%d, block_id=%d\n", e->e.block_metadata.abbrev_len,
161 e->e.block_metadata.block_id);
166 void dump_blockinfo(struct blockinfo *bi)
168 if(bi)
170 printf("Blockinfo! BlockID: %u, Abbrevs:\n", bi->block_id);
171 for(int i = 0; i < bi->num_abbreviations; i++)
172 print_abbrev(bi->abbreviations[i].operands, bi->abbreviations[i].num_operands);
177 static int refill_next_bits(struct bc_read_stream *stream);
179 struct bc_read_stream *bc_rs_open_file(const char *filename)
181 FILE *infile = fopen(filename, "r");
183 if(infile == NULL)
185 return NULL;
188 char magic[4];
189 int ret = fread(magic, 4, 1, infile);
190 if(ret < 1 || magic[0] != 'B' || magic[1] != 'C')
192 fclose(infile);
193 return NULL;
196 /* TODO: give the application a way to get the app-specific magic number */
198 struct bc_read_stream *stream = malloc(sizeof(*stream));
199 stream->infile = infile;
200 stream->stream_err = 0;
202 stream->next_bits = 0;
203 stream->num_next_bits = 0;
204 stream->stream_offset = 0;
205 refill_next_bits(stream);
207 stream->abbrev_len = 2; /* its initial value according to the spec */
208 stream->num_abbrevs = 0;
210 stream->stream_stack_size = 8; /* enough for a few levels of nesting and a few abbrevs */
211 stream->stream_stack = malloc(stream->stream_stack_size*sizeof(*stream->stream_stack));
213 /* we create an outermose stack frame -- this exists mostly to store
214 * the abbrev length of the outermost scope, and to store a bogus
215 * block_id so that we'll never find a blockinfo for the outer scope */
216 stream->stream_stack_len = 1;
217 stream->block_metadata = &stream->stream_stack[0];
218 stream->block_metadata->type = BlockMetadata;
219 stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
220 stream->block_metadata->e.block_metadata.block_id = -1;
222 stream->record_type = DataRecord; /* anything besides Eof */
224 stream->abbrev_operands_size = 8;
225 stream->abbrev_operands_len = 0;
226 stream->abbrev_operands = malloc(stream->abbrev_operands_size*sizeof(*stream->abbrev_operands));
228 stream->blockinfo_size = 8;
229 stream->blockinfo_len = 0;
230 stream->blockinfos = malloc(stream->blockinfo_size*sizeof(*stream->blockinfos));
232 stream->record_buf_size = 8;
233 stream->record_buf = malloc(stream->record_buf_size*sizeof(*stream->record_buf));
235 stream->record_size_abbrev = 8;
236 stream->record_abbrev_operands = malloc(stream->record_size_abbrev*sizeof(*stream->record_abbrev_operands));
238 return stream;
241 void bc_rs_close_stream(struct bc_read_stream *stream)
243 free(stream->record_abbrev_operands);
244 free(stream->record_buf);
245 free(stream->abbrev_operands);
246 free(stream->stream_stack);
248 for(int i = 0; i < stream->blockinfo_len; i++)
250 for(int j = 0; j < stream->blockinfos[i].num_abbreviations; j++)
252 free(stream->blockinfos[i].abbreviations[j].operands);
254 free(stream->blockinfos[i].abbreviations);
256 free(stream->blockinfos);
258 fclose(stream->infile);
259 free(stream);
262 uint64_t bc_rs_read_64(struct bc_read_stream *stream, int i)
264 if(i > stream->current_record_size)
266 stream->stream_err |= BITCODE_ERR_NO_SUCH_VALUE;
267 return 0;
269 else
271 return stream->record_buf[i];
276 #define GETTER_FUNC(type, bits) \
277 type bc_rs_read_ ## bits (struct bc_read_stream *stream, int i) \
279 uint64_t val = bc_rs_read_64(stream, i); \
280 if(stream->record_buf[i] > ((1ULL << bits) - 1)) \
282 stream->stream_err |= BITCODE_ERR_VALUE_TOO_LARGE; \
283 return 0; \
285 else \
287 return (type)val; \
291 GETTER_FUNC(uint8_t, 8)
292 GETTER_FUNC(uint16_t, 16)
293 GETTER_FUNC(uint32_t, 32)
295 #define NEXT_GETTER_FUNC(type, bits) \
296 type bc_rs_read_next_ ## bits (struct bc_read_stream *stream) \
298 return bc_rs_read_ ## bits(stream, stream->current_record_offset++); \
301 NEXT_GETTER_FUNC(uint8_t, 8)
302 NEXT_GETTER_FUNC(uint16_t, 16)
303 NEXT_GETTER_FUNC(uint32_t, 32)
304 NEXT_GETTER_FUNC(uint64_t, 64)
306 static int refill_next_bits(struct bc_read_stream *stream)
308 unsigned char buf[4];
309 int ret = fread(buf, 4, 1, stream->infile);
310 if(ret < 1)
312 //if(feof(stream->infile))
313 // stream->record_type = Eof;
315 if(ferror(stream->infile))
316 stream->stream_err |= BITCODE_ERR_IO;
318 return -1;
321 stream->next_bits = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
322 stream->num_next_bits = 32;
323 stream->stream_offset += 4;
325 return 0;
328 #define LOW_BITS(bitfield, num_bits) (bitfield & (~0U >> (32-num_bits)))
330 static uint32_t read_fixed(struct bc_read_stream *stream, int num_bits)
332 uint32_t ret;
334 if(stream->num_next_bits >= num_bits)
336 /* next_bits already contains all the bits we need -- take them */
337 ret = LOW_BITS(stream->next_bits, num_bits);
338 stream->next_bits >>= num_bits;
339 stream->num_next_bits -= num_bits;
341 else
343 /* we need all of next_bits, and then some */
344 ret = stream->next_bits;
345 int bits_filled = stream->num_next_bits;
346 int bits_left = num_bits - bits_filled;
348 if(refill_next_bits(stream) < 0) return 0;
350 /* take bits_left bits from the next_bits */
351 ret |= LOW_BITS(stream->next_bits, bits_left) << bits_filled;
353 if(bits_left != 32)
354 stream->next_bits >>= bits_left;
355 else
356 stream->next_bits = 0;
358 stream->num_next_bits -= bits_left;
361 if(stream->num_next_bits == 0)
363 /* We could defer this, but doing it now makes our stream_offset more accurate */
364 refill_next_bits(stream);
367 return ret;
370 static uint64_t read_fixed_64(struct bc_read_stream *stream, int num_bits)
372 if(num_bits <= 32)
374 return read_fixed(stream, num_bits);
376 else
378 uint64_t ret = read_fixed(stream, 32);
379 return ret | ((uint64_t)read_fixed(stream, num_bits-32) << 32);
383 static uint64_t read_vbr_64(struct bc_read_stream *stream, int bits)
385 uint64_t val = 0;
386 int read_bits = 0;
387 int continuation_bit = 1 << (bits-1);
388 int value_bits = continuation_bit - 1;
389 int continues = 0;
391 do {
392 uint32_t next_bits = read_fixed(stream, bits);
393 continues = next_bits & continuation_bit;
394 val |= (next_bits & value_bits) << read_bits;
395 read_bits += bits-1;
396 } while(continues);
398 return val;
401 static uint32_t read_vbr(struct bc_read_stream *stream, int bits)
403 uint64_t val = read_vbr_64(stream, bits);
404 if(val >> 32)
406 stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
407 return 0;
409 else
411 return (uint32_t)val;
415 static uint8_t decode_char6(int num)
417 if(num < 26) return 'a' + num;
418 else if(num < 52) return 'A' + (num-26);
419 else if(num < 62) return '0' + (num-52);
420 else if(num < 63) return '.';
421 else return '_';
424 /* This can handle any abbreviated type except for arrays */
425 static uint64_t read_abbrev_value(struct bc_read_stream *stream, struct abbrev_operand *op)
427 if(op->type == Literal)
428 return op->o.literal_value;
429 else
431 switch(op->o.encoding_info.encoding) {
432 case OP_ENCODING_FIXED:
433 return read_fixed_64(stream, op->o.encoding_info.value);
434 case OP_ENCODING_VBR:
435 return read_vbr_64(stream, op->o.encoding_info.value);
436 case OP_ENCODING_CHAR6:
437 return decode_char6(read_fixed(stream, 6));
438 default:
439 stream->stream_err |= BITCODE_ERR_INTERNAL;
440 return 0;
445 static void append_value(struct bc_read_stream *stream, uint64_t val)
447 RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size, stream->current_record_size+1);
448 stream->record_buf[stream->current_record_size++] = val;
451 static void read_user_abbreviated_record(struct bc_read_stream *stream,
452 struct abbrev_operand *ops,
453 int num_operands)
455 stream->current_record_size = 0;
457 for(int i = 0; i < num_operands; i++)
459 struct abbrev_operand *op = &ops[i];
461 if(op->type == EncodingInfo && op->o.encoding_info.encoding == OP_ENCODING_ARRAY)
463 int num_elements = read_vbr(stream, 6);
464 i += 1;
465 for(int j = 0; j < num_elements; j++)
466 append_value(stream, read_abbrev_value(stream, &ops[i]));
468 else
470 uint64_t val = read_abbrev_value(stream, &ops[i]);
471 if(i == 0)
473 stream->record_id = val;
475 else
477 append_value(stream, val);
483 static int read_abbrev_op(struct bc_read_stream *stream, struct abbrev_operand *o, int array_ok)
485 int is_literal = read_fixed(stream, 1);
486 if(is_literal)
488 o->type = Literal;
489 o->o.literal_value = read_vbr(stream, 8);
491 else
493 o->type = EncodingInfo;
494 o->o.encoding_info.encoding = read_fixed(stream, 3);
495 switch(o->o.encoding_info.encoding)
497 case OP_ENCODING_FIXED:
498 case OP_ENCODING_VBR:
499 o->o.encoding_info.value = read_vbr(stream, 5);
500 break;
502 case OP_ENCODING_ARRAY:
503 if(!array_ok) return -1;
504 break;
506 case OP_ENCODING_CHAR6:
507 break;
510 return 0;
514 void align_32_bits(struct bc_read_stream *stream)
516 if(stream->num_next_bits != 32)
517 refill_next_bits(stream);
520 struct blockinfo *find_blockinfo(struct bc_read_stream *stream, int block_id)
522 for(int i = 0; i < stream->blockinfo_len; i++)
523 if(stream->blockinfos[i].block_id == block_id)
524 return &stream->blockinfos[i];
526 return NULL;
529 struct blockinfo *find_or_create_blockinfo(struct bc_read_stream *stream, int block_id)
531 struct blockinfo *bi = find_blockinfo(stream, block_id);
533 if(bi)
535 return bi;
537 else
539 RESIZE_ARRAY_IF_NECESSARY(stream->blockinfos, stream->blockinfo_size, stream->blockinfo_len+1);
541 struct blockinfo *new_bi = &stream->blockinfos[stream->blockinfo_len++];
543 new_bi->block_id = block_id;
544 new_bi->num_abbreviations = 0;
545 new_bi->size_abbreviations = 8;
546 new_bi->abbreviations = malloc(new_bi->size_abbreviations * sizeof(*new_bi->abbreviations));
548 return new_bi;
552 static void pop_stack_frame(struct bc_read_stream *stream)
554 stream->stream_stack_len = stream->block_metadata - stream->stream_stack;
555 if(stream->stream_stack_len == 0)
557 stream->record_type = Eof;
558 return;
561 stream->num_abbrevs = 0;
562 stream->block_metadata--;
563 while(stream->block_metadata->type == Abbreviation)
565 stream->num_abbrevs++;
566 stream->block_metadata--;
569 stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
570 stream->block_id = stream->block_metadata->e.block_metadata.block_id;
571 stream->blockinfo = find_blockinfo(stream, stream->block_id);
575 void bc_rs_next_record(struct bc_read_stream *stream)
577 /* don't attempt to read past eof */
578 if(stream->record_type == Eof) return;
580 int abbrev_id = read_fixed(stream, stream->abbrev_len);
581 stream->current_record_offset = 0;
583 switch(abbrev_id) {
584 case ABBREV_ID_END_BLOCK:
585 stream->record_type = EndBlock;
586 stream->old_block_metadata = stream->block_metadata;
588 align_32_bits(stream);
589 pop_stack_frame(stream);
591 break;
593 case ABBREV_ID_ENTER_SUBBLOCK:
594 stream->block_id = read_vbr(stream, 8);
595 stream->abbrev_len = read_vbr(stream, 4);
596 align_32_bits(stream);
597 stream->block_len = read_fixed(stream, 32);
598 stream->record_type = StartBlock;
600 RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
601 stream->stream_stack_len+1);
603 stream->block_metadata = &stream->stream_stack[stream->stream_stack_len++];
604 stream->block_metadata->type = BlockMetadata;
605 stream->block_metadata->e.block_metadata.block_id = stream->block_id;
606 stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
607 stream->block_metadata->e.block_metadata.block_offset = stream->stream_offset;
608 stream->block_metadata->e.block_metadata.block_len = stream->block_len;
610 //printf("++ Entering block id=%d, offset=%d\n", stream->block_id, stream->stream_offset);
612 stream->blockinfo = find_or_create_blockinfo(stream, stream->block_id);
613 break;
615 case ABBREV_ID_DEFINE_ABBREV:
616 stream->record_type = DefineAbbrev;
617 stream->record_num_abbrev = read_vbr(stream, 5);
619 RESIZE_ARRAY_IF_NECESSARY(stream->record_abbrev_operands, stream->record_size_abbrev,
620 stream->record_num_abbrev);
622 for(int i = 0; i < stream->record_num_abbrev; i++)
624 read_abbrev_op(stream, &stream->record_abbrev_operands[i], 0);
627 break;
629 case ABBREV_ID_UNABBREV_RECORD:
630 stream->record_type = DataRecord;
631 stream->record_id = read_vbr(stream, 6);
633 stream->current_record_size = read_vbr(stream, 6);
635 RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size,
636 stream->current_record_size+1);
638 for(int i = 0; i < stream->current_record_size; i++)
639 stream->record_buf[i] = read_vbr(stream, 6);
640 break;
642 default:
644 /* This must be a user-defined abbreviation. It could come from the
645 * blockinfo-defined abbreviations or abbreviations defined in this
646 * block. */
647 stream->record_type = DataRecord;
648 int user_abbrev_id = abbrev_id - 4;
649 int num_blockinfo_abbrevs = stream->blockinfo ? stream->blockinfo->num_abbreviations : 0;
650 int block_abbrev_id = user_abbrev_id - num_blockinfo_abbrevs;
651 if(user_abbrev_id < num_blockinfo_abbrevs)
653 struct blockinfo_abbrev *a = &stream->blockinfo->abbreviations[user_abbrev_id];
654 read_user_abbreviated_record(stream, a->operands, a->num_operands);
656 else if(block_abbrev_id < stream->num_abbrevs)
658 struct stream_stack_entry *e = stream->block_metadata + block_abbrev_id + 1;
659 struct abbrev_operand *o = stream->abbrev_operands + e->e.abbrev.first_operand_offset;
660 read_user_abbreviated_record(stream, o, e->e.abbrev.num_operands);
662 else
664 stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
666 break;
671 struct record_info bc_rs_next_data_record(struct bc_read_stream *stream)
673 while(1)
675 bc_rs_next_record(stream);
677 if(stream->record_type == DefineAbbrev)
679 int num_ops = stream->record_num_abbrev;
681 RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
682 stream->stream_stack_len+1);
683 RESIZE_ARRAY_IF_NECESSARY(stream->abbrev_operands, stream->abbrev_operands_size,
684 stream->abbrev_operands_len+num_ops+1);
686 struct stream_stack_entry *e = &stream->stream_stack[stream->stream_stack_len++];
687 e->type = Abbreviation;
688 e->e.abbrev.first_operand_offset = stream->abbrev_operands_len;
689 e->e.abbrev.num_operands = num_ops;
690 struct abbrev_operand *abbrev_operands = &stream->abbrev_operands[stream->abbrev_operands_len];
691 stream->abbrev_operands_len += num_ops;
693 for(int i = 0; i < num_ops; i++)
694 abbrev_operands[i] = stream->record_abbrev_operands[i];
696 stream->num_abbrevs++;
698 else if(stream->record_type == StartBlock && stream->block_id == STDBLOCK_BLOCKINFO)
700 /* The first record must be a SETBID record */
701 bc_rs_next_record(stream);
702 struct blockinfo *bi = NULL;
704 while(1)
706 if(stream->record_type == EndBlock)
708 break;
710 else if(stream->record_type == Err || stream->record_type == Eof)
712 struct record_info ri;
713 ri.record_type = stream->record_type;
714 ri.id = 0;
715 return ri;
717 else if(stream->record_type == DataRecord)
719 if(stream->record_id == BLOCKINFO_BLOCK_SETBID)
721 if(stream->current_record_size != 1)
723 /* TODO */
724 stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
726 bi = find_or_create_blockinfo(stream, stream->record_buf[0]);
729 else if(stream->record_type == DefineAbbrev)
732 if(bi == NULL)
734 /* TODO */
735 stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
738 RESIZE_ARRAY_IF_NECESSARY(bi->abbreviations,
739 bi->size_abbreviations, bi->num_abbreviations+1);
741 struct blockinfo_abbrev *abbrev = &bi->abbreviations[bi->num_abbreviations++];
742 abbrev->num_operands = stream->record_num_abbrev;
743 abbrev->operands = malloc(sizeof(*abbrev->operands) * abbrev->num_operands);
744 for(int i = 0; i < abbrev->num_operands; i++)
745 abbrev->operands[i] = stream->record_abbrev_operands[i];
748 bc_rs_next_record(stream);
752 else
754 struct record_info ri;
755 ri.record_type = stream->record_type;
756 ri.id = 0;
758 if(ri.record_type == StartBlock) ri.id = stream->block_id;
759 else if(ri.record_type == DataRecord) ri.id = stream->record_id;
761 return ri;
766 int bc_rs_get_error(struct bc_read_stream *stream)
768 return stream->stream_err;
771 int bc_rs_get_record_size(struct bc_read_stream *stream)
773 return stream->current_record_size;
776 int bc_rs_get_remaining_record_size(struct bc_read_stream *stream)
778 return stream->current_record_size - stream->current_record_offset;
781 void bc_rs_skip_block(struct bc_read_stream *stream)
783 int offset = stream->block_metadata->e.block_metadata.block_offset +
784 (stream->block_metadata->e.block_metadata.block_len * 4);
786 fseek(stream->infile, offset, SEEK_SET);
787 stream->stream_offset = offset-4;
788 refill_next_bits(stream);
789 pop_stack_frame(stream);
792 void bc_rs_rewind_block(struct bc_read_stream *stream)
794 if(stream->record_type == EndBlock)
796 stream->num_abbrevs = stream->old_block_metadata - stream->block_metadata - 1;
797 stream->block_metadata = stream->old_block_metadata;
798 stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
799 stream->block_id = stream->block_metadata->e.block_metadata.block_id;
800 stream->blockinfo = find_or_create_blockinfo(stream, stream->block_id);
801 stream->stream_stack_len = stream->block_metadata - stream->stream_stack + 1;
804 int offset = stream->block_metadata->e.block_metadata.block_offset;
805 fseek(stream->infile, offset, SEEK_SET);
806 stream->stream_offset = offset-4;
807 refill_next_bits(stream);
808 align_32_bits(stream);
812 * Local Variables:
813 * c-file-style: "bsd"
814 * c-basic-offset: 4
815 * indent-tabs-mode: nil
816 * End:
817 * vim:et:sts=4:sw=4