2 #include "bc_read_stream.h"
4 #define OP_ENCODING_FIXED 1
5 #define OP_ENCODING_VBR 2
6 #define OP_ENCODING_ARRAY 3
7 #define OP_ENCODING_CHAR6 4
9 #define ABBREV_ID_END_BLOCK 0
10 #define ABBREV_ID_ENTER_SUBBLOCK 1
11 #define ABBREV_ID_DEFINE_ABBREV 2
12 #define ABBREV_ID_UNABBREV_RECORD 3
14 #define STDBLOCK_BLOCKINFO 0
16 #define BLOCKINFO_BLOCK_SETBID 1
18 #define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
19 if(size < desired_size) \
22 ptr = realloc(ptr, size*sizeof(*ptr)); \
30 int num_abbreviations
;
31 int size_abbreviations
;
32 struct blockinfo_abbrev
{
34 struct abbrev_operand
*operands
;
38 struct stream_stack_entry
41 struct block_metadata
{
49 int first_operand_offset
;
63 long long literal_value
;
65 unsigned char encoding
;
78 /* Values for the stream */
85 struct stream_stack_entry
*old_block_metadata
;
87 /* Values for the current block */
90 struct stream_stack_entry
*block_metadata
;
91 struct blockinfo
*blockinfo
;
93 /* Values for the current record */
94 enum RecordType record_type
;
96 /* - for data records */
98 int current_record_size
;
99 int current_record_offset
;
101 uint64_t *record_buf
;
103 /* - for StartBlock records */
107 /* - for DefineAbbrev records */
108 int record_size_abbrev
;
109 int record_num_abbrev
;
110 struct abbrev_operand
*record_abbrev_operands
;
113 /* The stream stack */
114 int stream_stack_size
;
115 int stream_stack_len
;
116 struct stream_stack_entry
*stream_stack
;
118 int abbrev_operands_size
;
119 int abbrev_operands_len
;
120 struct abbrev_operand
*abbrev_operands
;
122 /* Data about blockinfo records we have encountered */
125 struct blockinfo
*blockinfos
;
129 void print_abbrev(struct abbrev_operand *operands, int num_operands)
131 printf("Abbrev: num_operands=%d\n", num_operands);
132 for(int i = 0; i < num_operands; i++)
134 struct abbrev_operand *o = &operands[i];
135 if(o->type == Literal)
137 printf(" Literal value: %llu\n", o->o.literal_value);
139 else if(o->type == EncodingInfo)
141 printf(" EncodingInfo: encoding=%u, value=%d\n", o->o.encoding_info.encoding,
142 o->o.encoding_info.value);
147 void dump_stack(struct bc_read_stream *s)
149 printf("Stream stack: %d entries\n", s->stream_stack_len);
150 for(int i = 0; i < s->stream_stack_len; i++)
153 struct stream_stack_entry *e = &s->stream_stack[i];
154 if(e->type == Abbreviation)
156 print_abbrev(s->abbrev_operands + e->e.abbrev.first_operand_offset, e->e.abbrev.num_operands);
158 else if(e->type == BlockMetadata)
160 printf("BlockMetadata: abbrev_len=%d, block_id=%d\n", e->e.block_metadata.abbrev_len,
161 e->e.block_metadata.block_id);
166 void dump_blockinfo(struct blockinfo *bi)
170 printf("Blockinfo! BlockID: %u, Abbrevs:\n", bi->block_id);
171 for(int i = 0; i < bi->num_abbreviations; i++)
172 print_abbrev(bi->abbreviations[i].operands, bi->abbreviations[i].num_operands);
177 static int refill_next_bits(struct bc_read_stream
*stream
);
179 struct bc_read_stream
*bc_rs_open_file(const char *filename
)
181 FILE *infile
= fopen(filename
, "r");
189 int ret
= fread(magic
, 4, 1, infile
);
190 if(ret
< 1 || magic
[0] != 'B' || magic
[1] != 'C')
196 /* TODO: give the application a way to get the app-specific magic number */
198 struct bc_read_stream
*stream
= malloc(sizeof(*stream
));
199 stream
->infile
= infile
;
200 stream
->stream_err
= 0;
202 stream
->next_bits
= 0;
203 stream
->num_next_bits
= 0;
204 stream
->stream_offset
= 0;
205 refill_next_bits(stream
);
207 stream
->abbrev_len
= 2; /* its initial value according to the spec */
208 stream
->num_abbrevs
= 0;
210 stream
->stream_stack_size
= 8; /* enough for a few levels of nesting and a few abbrevs */
211 stream
->stream_stack
= malloc(stream
->stream_stack_size
*sizeof(*stream
->stream_stack
));
213 /* we create an outermose stack frame -- this exists mostly to store
214 * the abbrev length of the outermost scope, and to store a bogus
215 * block_id so that we'll never find a blockinfo for the outer scope */
216 stream
->stream_stack_len
= 1;
217 stream
->block_metadata
= &stream
->stream_stack
[0];
218 stream
->block_metadata
->type
= BlockMetadata
;
219 stream
->block_metadata
->e
.block_metadata
.abbrev_len
= stream
->abbrev_len
;
220 stream
->block_metadata
->e
.block_metadata
.block_id
= -1;
222 stream
->record_type
= DataRecord
; /* anything besides Eof */
224 stream
->abbrev_operands_size
= 8;
225 stream
->abbrev_operands_len
= 0;
226 stream
->abbrev_operands
= malloc(stream
->abbrev_operands_size
*sizeof(*stream
->abbrev_operands
));
228 stream
->blockinfo_size
= 8;
229 stream
->blockinfo_len
= 0;
230 stream
->blockinfos
= malloc(stream
->blockinfo_size
*sizeof(*stream
->blockinfos
));
232 stream
->record_buf_size
= 8;
233 stream
->record_buf
= malloc(stream
->record_buf_size
*sizeof(*stream
->record_buf
));
235 stream
->record_size_abbrev
= 8;
236 stream
->record_abbrev_operands
= malloc(stream
->record_size_abbrev
*sizeof(*stream
->record_abbrev_operands
));
241 void bc_rs_close_stream(struct bc_read_stream
*stream
)
243 free(stream
->record_abbrev_operands
);
244 free(stream
->record_buf
);
245 free(stream
->abbrev_operands
);
246 free(stream
->stream_stack
);
248 for(int i
= 0; i
< stream
->blockinfo_len
; i
++)
250 for(int j
= 0; j
< stream
->blockinfos
[i
].num_abbreviations
; j
++)
252 free(stream
->blockinfos
[i
].abbreviations
[j
].operands
);
254 free(stream
->blockinfos
[i
].abbreviations
);
256 free(stream
->blockinfos
);
258 fclose(stream
->infile
);
262 uint64_t bc_rs_read_64(struct bc_read_stream
*stream
, int i
)
264 if(i
> stream
->current_record_size
)
266 stream
->stream_err
|= BITCODE_ERR_NO_SUCH_VALUE
;
271 return stream
->record_buf
[i
];
276 #define GETTER_FUNC(type, bits) \
277 type bc_rs_read_ ## bits (struct bc_read_stream *stream, int i) \
279 uint64_t val = bc_rs_read_64(stream, i); \
280 if(stream->record_buf[i] > ((1ULL << bits) - 1)) \
282 stream->stream_err |= BITCODE_ERR_VALUE_TOO_LARGE; \
291 GETTER_FUNC(uint8_t, 8)
292 GETTER_FUNC(uint16_t, 16)
293 GETTER_FUNC(uint32_t, 32)
295 #define NEXT_GETTER_FUNC(type, bits) \
296 type bc_rs_read_next_ ## bits (struct bc_read_stream *stream) \
298 return bc_rs_read_ ## bits(stream, stream->current_record_offset++); \
301 NEXT_GETTER_FUNC(uint8_t, 8)
302 NEXT_GETTER_FUNC(uint16_t, 16)
303 NEXT_GETTER_FUNC(uint32_t, 32)
304 NEXT_GETTER_FUNC(uint64_t, 64)
306 static int refill_next_bits(struct bc_read_stream
*stream
)
308 unsigned char buf
[4];
309 int ret
= fread(buf
, 4, 1, stream
->infile
);
312 //if(feof(stream->infile))
313 // stream->record_type = Eof;
315 if(ferror(stream
->infile
))
316 stream
->stream_err
|= BITCODE_ERR_IO
;
321 stream
->next_bits
= buf
[0] | (buf
[1] << 8) | (buf
[2] << 16) | (buf
[3] << 24);
322 stream
->num_next_bits
= 32;
323 stream
->stream_offset
+= 4;
328 #define LOW_BITS(bitfield, num_bits) (bitfield & (~0U >> (32-num_bits)))
330 static uint32_t read_fixed(struct bc_read_stream
*stream
, int num_bits
)
334 if(stream
->num_next_bits
>= num_bits
)
336 /* next_bits already contains all the bits we need -- take them */
337 ret
= LOW_BITS(stream
->next_bits
, num_bits
);
338 stream
->next_bits
>>= num_bits
;
339 stream
->num_next_bits
-= num_bits
;
343 /* we need all of next_bits, and then some */
344 ret
= stream
->next_bits
;
345 int bits_filled
= stream
->num_next_bits
;
346 int bits_left
= num_bits
- bits_filled
;
348 if(refill_next_bits(stream
) < 0) return 0;
350 /* take bits_left bits from the next_bits */
351 ret
|= LOW_BITS(stream
->next_bits
, bits_left
) << bits_filled
;
354 stream
->next_bits
>>= bits_left
;
356 stream
->next_bits
= 0;
358 stream
->num_next_bits
-= bits_left
;
361 if(stream
->num_next_bits
== 0)
363 /* We could defer this, but doing it now makes our stream_offset more accurate */
364 refill_next_bits(stream
);
370 static uint64_t read_fixed_64(struct bc_read_stream
*stream
, int num_bits
)
374 return read_fixed(stream
, num_bits
);
378 uint64_t ret
= read_fixed(stream
, 32);
379 return ret
| ((uint64_t)read_fixed(stream
, num_bits
-32) << 32);
383 static uint64_t read_vbr_64(struct bc_read_stream
*stream
, int bits
)
387 int continuation_bit
= 1 << (bits
-1);
388 int value_bits
= continuation_bit
- 1;
392 uint32_t next_bits
= read_fixed(stream
, bits
);
393 continues
= next_bits
& continuation_bit
;
394 val
|= (next_bits
& value_bits
) << read_bits
;
401 static uint32_t read_vbr(struct bc_read_stream
*stream
, int bits
)
403 uint64_t val
= read_vbr_64(stream
, bits
);
406 stream
->stream_err
|= BITCODE_ERR_CORRUPT_INPUT
;
411 return (uint32_t)val
;
415 static uint8_t decode_char6(int num
)
417 if(num
< 26) return 'a' + num
;
418 else if(num
< 52) return 'A' + (num
-26);
419 else if(num
< 62) return '0' + (num
-52);
420 else if(num
< 63) return '.';
424 /* This can handle any abbreviated type except for arrays */
425 static uint64_t read_abbrev_value(struct bc_read_stream
*stream
, struct abbrev_operand
*op
)
427 if(op
->type
== Literal
)
428 return op
->o
.literal_value
;
431 switch(op
->o
.encoding_info
.encoding
) {
432 case OP_ENCODING_FIXED
:
433 return read_fixed_64(stream
, op
->o
.encoding_info
.value
);
434 case OP_ENCODING_VBR
:
435 return read_vbr_64(stream
, op
->o
.encoding_info
.value
);
436 case OP_ENCODING_CHAR6
:
437 return decode_char6(read_fixed(stream
, 6));
439 stream
->stream_err
|= BITCODE_ERR_INTERNAL
;
445 static void append_value(struct bc_read_stream
*stream
, uint64_t val
)
447 RESIZE_ARRAY_IF_NECESSARY(stream
->record_buf
, stream
->record_buf_size
, stream
->current_record_size
+1);
448 stream
->record_buf
[stream
->current_record_size
++] = val
;
451 static void read_user_abbreviated_record(struct bc_read_stream
*stream
,
452 struct abbrev_operand
*ops
,
455 stream
->current_record_size
= 0;
457 for(int i
= 0; i
< num_operands
; i
++)
459 struct abbrev_operand
*op
= &ops
[i
];
461 if(op
->type
== EncodingInfo
&& op
->o
.encoding_info
.encoding
== OP_ENCODING_ARRAY
)
463 int num_elements
= read_vbr(stream
, 6);
465 for(int j
= 0; j
< num_elements
; j
++)
466 append_value(stream
, read_abbrev_value(stream
, &ops
[i
]));
470 uint64_t val
= read_abbrev_value(stream
, &ops
[i
]);
473 stream
->record_id
= val
;
477 append_value(stream
, val
);
483 static int read_abbrev_op(struct bc_read_stream
*stream
, struct abbrev_operand
*o
, int array_ok
)
485 int is_literal
= read_fixed(stream
, 1);
489 o
->o
.literal_value
= read_vbr(stream
, 8);
493 o
->type
= EncodingInfo
;
494 o
->o
.encoding_info
.encoding
= read_fixed(stream
, 3);
495 switch(o
->o
.encoding_info
.encoding
)
497 case OP_ENCODING_FIXED
:
498 case OP_ENCODING_VBR
:
499 o
->o
.encoding_info
.value
= read_vbr(stream
, 5);
502 case OP_ENCODING_ARRAY
:
503 if(!array_ok
) return -1;
506 case OP_ENCODING_CHAR6
:
514 void align_32_bits(struct bc_read_stream
*stream
)
516 if(stream
->num_next_bits
!= 32)
517 refill_next_bits(stream
);
520 struct blockinfo
*find_blockinfo(struct bc_read_stream
*stream
, int block_id
)
522 for(int i
= 0; i
< stream
->blockinfo_len
; i
++)
523 if(stream
->blockinfos
[i
].block_id
== block_id
)
524 return &stream
->blockinfos
[i
];
529 struct blockinfo
*find_or_create_blockinfo(struct bc_read_stream
*stream
, int block_id
)
531 struct blockinfo
*bi
= find_blockinfo(stream
, block_id
);
539 RESIZE_ARRAY_IF_NECESSARY(stream
->blockinfos
, stream
->blockinfo_size
, stream
->blockinfo_len
+1);
541 struct blockinfo
*new_bi
= &stream
->blockinfos
[stream
->blockinfo_len
++];
543 new_bi
->block_id
= block_id
;
544 new_bi
->num_abbreviations
= 0;
545 new_bi
->size_abbreviations
= 8;
546 new_bi
->abbreviations
= malloc(new_bi
->size_abbreviations
* sizeof(*new_bi
->abbreviations
));
552 static void pop_stack_frame(struct bc_read_stream
*stream
)
554 stream
->stream_stack_len
= stream
->block_metadata
- stream
->stream_stack
;
555 if(stream
->stream_stack_len
== 0)
557 stream
->record_type
= Eof
;
561 stream
->num_abbrevs
= 0;
562 stream
->block_metadata
--;
563 while(stream
->block_metadata
->type
== Abbreviation
)
565 stream
->num_abbrevs
++;
566 stream
->block_metadata
--;
569 stream
->abbrev_len
= stream
->block_metadata
->e
.block_metadata
.abbrev_len
;
570 stream
->block_id
= stream
->block_metadata
->e
.block_metadata
.block_id
;
571 stream
->blockinfo
= find_blockinfo(stream
, stream
->block_id
);
575 void bc_rs_next_record(struct bc_read_stream
*stream
)
577 /* don't attempt to read past eof */
578 if(stream
->record_type
== Eof
) return;
580 int abbrev_id
= read_fixed(stream
, stream
->abbrev_len
);
581 stream
->current_record_offset
= 0;
584 case ABBREV_ID_END_BLOCK
:
585 stream
->record_type
= EndBlock
;
586 stream
->old_block_metadata
= stream
->block_metadata
;
588 align_32_bits(stream
);
589 pop_stack_frame(stream
);
593 case ABBREV_ID_ENTER_SUBBLOCK
:
594 stream
->block_id
= read_vbr(stream
, 8);
595 stream
->abbrev_len
= read_vbr(stream
, 4);
596 align_32_bits(stream
);
597 stream
->block_len
= read_fixed(stream
, 32);
598 stream
->record_type
= StartBlock
;
600 RESIZE_ARRAY_IF_NECESSARY(stream
->stream_stack
, stream
->stream_stack_size
,
601 stream
->stream_stack_len
+1);
603 stream
->block_metadata
= &stream
->stream_stack
[stream
->stream_stack_len
++];
604 stream
->block_metadata
->type
= BlockMetadata
;
605 stream
->block_metadata
->e
.block_metadata
.block_id
= stream
->block_id
;
606 stream
->block_metadata
->e
.block_metadata
.abbrev_len
= stream
->abbrev_len
;
607 stream
->block_metadata
->e
.block_metadata
.block_offset
= stream
->stream_offset
;
608 stream
->block_metadata
->e
.block_metadata
.block_len
= stream
->block_len
;
610 //printf("++ Entering block id=%d, offset=%d\n", stream->block_id, stream->stream_offset);
612 stream
->blockinfo
= find_or_create_blockinfo(stream
, stream
->block_id
);
615 case ABBREV_ID_DEFINE_ABBREV
:
616 stream
->record_type
= DefineAbbrev
;
617 stream
->record_num_abbrev
= read_vbr(stream
, 5);
619 RESIZE_ARRAY_IF_NECESSARY(stream
->record_abbrev_operands
, stream
->record_size_abbrev
,
620 stream
->record_num_abbrev
);
622 for(int i
= 0; i
< stream
->record_num_abbrev
; i
++)
624 read_abbrev_op(stream
, &stream
->record_abbrev_operands
[i
], 0);
629 case ABBREV_ID_UNABBREV_RECORD
:
630 stream
->record_type
= DataRecord
;
631 stream
->record_id
= read_vbr(stream
, 6);
633 stream
->current_record_size
= read_vbr(stream
, 6);
635 RESIZE_ARRAY_IF_NECESSARY(stream
->record_buf
, stream
->record_buf_size
,
636 stream
->current_record_size
+1);
638 for(int i
= 0; i
< stream
->current_record_size
; i
++)
639 stream
->record_buf
[i
] = read_vbr(stream
, 6);
644 /* This must be a user-defined abbreviation. It could come from the
645 * blockinfo-defined abbreviations or abbreviations defined in this
647 stream
->record_type
= DataRecord
;
648 int user_abbrev_id
= abbrev_id
- 4;
649 int num_blockinfo_abbrevs
= stream
->blockinfo
? stream
->blockinfo
->num_abbreviations
: 0;
650 int block_abbrev_id
= user_abbrev_id
- num_blockinfo_abbrevs
;
651 if(user_abbrev_id
< num_blockinfo_abbrevs
)
653 struct blockinfo_abbrev
*a
= &stream
->blockinfo
->abbreviations
[user_abbrev_id
];
654 read_user_abbreviated_record(stream
, a
->operands
, a
->num_operands
);
656 else if(block_abbrev_id
< stream
->num_abbrevs
)
658 struct stream_stack_entry
*e
= stream
->block_metadata
+ block_abbrev_id
+ 1;
659 struct abbrev_operand
*o
= stream
->abbrev_operands
+ e
->e
.abbrev
.first_operand_offset
;
660 read_user_abbreviated_record(stream
, o
, e
->e
.abbrev
.num_operands
);
664 stream
->stream_err
|= BITCODE_ERR_CORRUPT_INPUT
;
671 struct record_info
bc_rs_next_data_record(struct bc_read_stream
*stream
)
675 bc_rs_next_record(stream
);
677 if(stream
->record_type
== DefineAbbrev
)
679 int num_ops
= stream
->record_num_abbrev
;
681 RESIZE_ARRAY_IF_NECESSARY(stream
->stream_stack
, stream
->stream_stack_size
,
682 stream
->stream_stack_len
+1);
683 RESIZE_ARRAY_IF_NECESSARY(stream
->abbrev_operands
, stream
->abbrev_operands_size
,
684 stream
->abbrev_operands_len
+num_ops
+1);
686 struct stream_stack_entry
*e
= &stream
->stream_stack
[stream
->stream_stack_len
++];
687 e
->type
= Abbreviation
;
688 e
->e
.abbrev
.first_operand_offset
= stream
->abbrev_operands_len
;
689 e
->e
.abbrev
.num_operands
= num_ops
;
690 struct abbrev_operand
*abbrev_operands
= &stream
->abbrev_operands
[stream
->abbrev_operands_len
];
691 stream
->abbrev_operands_len
+= num_ops
;
693 for(int i
= 0; i
< num_ops
; i
++)
694 abbrev_operands
[i
] = stream
->record_abbrev_operands
[i
];
696 stream
->num_abbrevs
++;
698 else if(stream
->record_type
== StartBlock
&& stream
->block_id
== STDBLOCK_BLOCKINFO
)
700 /* The first record must be a SETBID record */
701 bc_rs_next_record(stream
);
702 struct blockinfo
*bi
= NULL
;
706 if(stream
->record_type
== EndBlock
)
710 else if(stream
->record_type
== Err
|| stream
->record_type
== Eof
)
712 struct record_info ri
;
713 ri
.record_type
= stream
->record_type
;
717 else if(stream
->record_type
== DataRecord
)
719 if(stream
->record_id
== BLOCKINFO_BLOCK_SETBID
)
721 if(stream
->current_record_size
!= 1)
724 stream
->stream_err
|= BITCODE_ERR_CORRUPT_INPUT
;
726 bi
= find_or_create_blockinfo(stream
, stream
->record_buf
[0]);
729 else if(stream
->record_type
== DefineAbbrev
)
735 stream
->stream_err
|= BITCODE_ERR_CORRUPT_INPUT
;
738 RESIZE_ARRAY_IF_NECESSARY(bi
->abbreviations
,
739 bi
->size_abbreviations
, bi
->num_abbreviations
+1);
741 struct blockinfo_abbrev
*abbrev
= &bi
->abbreviations
[bi
->num_abbreviations
++];
742 abbrev
->num_operands
= stream
->record_num_abbrev
;
743 abbrev
->operands
= malloc(sizeof(*abbrev
->operands
) * abbrev
->num_operands
);
744 for(int i
= 0; i
< abbrev
->num_operands
; i
++)
745 abbrev
->operands
[i
] = stream
->record_abbrev_operands
[i
];
748 bc_rs_next_record(stream
);
754 struct record_info ri
;
755 ri
.record_type
= stream
->record_type
;
758 if(ri
.record_type
== StartBlock
) ri
.id
= stream
->block_id
;
759 else if(ri
.record_type
== DataRecord
) ri
.id
= stream
->record_id
;
766 int bc_rs_get_error(struct bc_read_stream
*stream
)
768 return stream
->stream_err
;
771 int bc_rs_get_record_size(struct bc_read_stream
*stream
)
773 return stream
->current_record_size
;
776 int bc_rs_get_remaining_record_size(struct bc_read_stream
*stream
)
778 return stream
->current_record_size
- stream
->current_record_offset
;
781 void bc_rs_skip_block(struct bc_read_stream
*stream
)
783 int offset
= stream
->block_metadata
->e
.block_metadata
.block_offset
+
784 (stream
->block_metadata
->e
.block_metadata
.block_len
* 4);
786 fseek(stream
->infile
, offset
, SEEK_SET
);
787 stream
->stream_offset
= offset
-4;
788 refill_next_bits(stream
);
789 pop_stack_frame(stream
);
792 void bc_rs_rewind_block(struct bc_read_stream
*stream
)
794 if(stream
->record_type
== EndBlock
)
796 stream
->num_abbrevs
= stream
->old_block_metadata
- stream
->block_metadata
- 1;
797 stream
->block_metadata
= stream
->old_block_metadata
;
798 stream
->abbrev_len
= stream
->block_metadata
->e
.block_metadata
.abbrev_len
;
799 stream
->block_id
= stream
->block_metadata
->e
.block_metadata
.block_id
;
800 stream
->blockinfo
= find_or_create_blockinfo(stream
, stream
->block_id
);
801 stream
->stream_stack_len
= stream
->block_metadata
- stream
->stream_stack
+ 1;
804 int offset
= stream
->block_metadata
->e
.block_metadata
.block_offset
;
805 fseek(stream
->infile
, offset
, SEEK_SET
);
806 stream
->stream_offset
= offset
-4;
807 refill_next_bits(stream
);
808 align_32_bits(stream
);
813 * c-file-style: "bsd"
815 * indent-tabs-mode: nil