io_scene_fbx/parse_fbx.py

   1 # SPDX-FileCopyrightText: 2006-2012 assimp team
   2 # SPDX-FileCopyrightText: 2013 Blender Foundation
   3 #
   4 # SPDX-License-Identifier: GPL-2.0-or-later
   5
   6 __all__ = (
   7     "parse",
   8     "data_types",
   9     "parse_version",
  10     "FBXElem",
  11     )
  12
  13 from struct import unpack
  14 import array
  15 import zlib
  16 from io import BytesIO
  17
  18 from . import data_types
  19 from .fbx_utils_threading import MultiThreadedTaskConsumer
  20
  21 # at the end of each nested block, there is a NUL record to indicate
  22 # that the sub-scope exists (i.e. to distinguish between P: and P : {})
  23 _BLOCK_SENTINEL_LENGTH = ...
  24 _BLOCK_SENTINEL_DATA = ...
  25 read_fbx_elem_start = ...
  26 _IS_BIG_ENDIAN = (__import__("sys").byteorder != 'little')
  27 _HEAD_MAGIC = b'Kaydara FBX Binary\x20\x20\x00\x1a\x00'
  28 from collections import namedtuple
  29 FBXElem = namedtuple("FBXElem", ("id", "props", "props_type", "elems"))
  30 del namedtuple
  31
  32
  33 def read_uint(read):
  34     return unpack(b'<I', read(4))[0]
  35
  36
  37 def read_ubyte(read):
  38     return unpack(b'B', read(1))[0]
  39
  40
  41 def read_string_ubyte(read):
  42     size = read_ubyte(read)
  43     data = read(size)
  44     return data
  45
  46
  47 def read_array_params(read):
  48     return unpack(b'<III', read(12))
  49
  50
  51 def read_elem_start32(read):
  52     end_offset, prop_count, _prop_length, elem_id_size = unpack(b'<IIIB', read(13))
  53     elem_id = read(elem_id_size) if elem_id_size else b""
  54     return end_offset, prop_count, elem_id
  55
  56
  57 def read_elem_start64(read):
  58     end_offset, prop_count, _prop_length, elem_id_size = unpack(b'<QQQB', read(25))
  59     elem_id = read(elem_id_size) if elem_id_size else b""
  60     return end_offset, prop_count, elem_id
  61
  62
  63 def _create_array(data, length, array_type, array_stride, array_byteswap):
  64     """Create an array from FBX data."""
  65     # If size of the data does not match the expected size of the array, then something is wrong with the code or the
  66     # FBX file.
  67     assert(length * array_stride == len(data))
  68
  69     data_array = array.array(array_type, data)
  70     if array_byteswap and _IS_BIG_ENDIAN:
  71         data_array.byteswap()
  72     return data_array
  73
  74
  75 def _decompress_and_insert_array(elem_props_data, index_to_set, compressed_array_args):
  76     """Decompress array data and insert the created array into the FBX tree being parsed.
  77
  78     This is usually called from a separate thread to the main thread."""
  79     compressed_data, length, array_type, array_stride, array_byteswap = compressed_array_args
  80
  81     # zlib.decompress releases the Global Interpreter Lock, so another thread can run code while waiting for the
  82     # decompression to complete.
  83     data = zlib.decompress(compressed_data, bufsize=length * array_stride)
  84
  85     # Create and insert the array into the parsed FBX hierarchy.
  86     elem_props_data[index_to_set] = _create_array(data, length, array_type, array_stride, array_byteswap)
  87
  88
  89 def unpack_array(read, array_type, array_stride, array_byteswap):
  90     """Unpack an array from an FBX file being parsed.
  91
  92     If the array data is compressed, the compressed data is combined with the other arguments into a tuple to prepare
  93     for decompressing on a separate thread if possible.
  94
  95     If the array data is not compressed, the array is created.
  96
  97     Returns (tuple, True) or (array, False)."""
  98     length, encoding, comp_len = read_array_params(read)
  99
 100     data = read(comp_len)
 101
 102     if encoding == 1:
 103         # Array data requires decompression, which is done in a separate thread if possible.
 104         return (data, length, array_type, array_stride, array_byteswap), True
 105     else:
 106         return _create_array(data, length, array_type, array_stride, array_byteswap), False
 107
 108
 109 read_array_dict = {
 110     b'b'[0]: lambda read: unpack_array(read, data_types.ARRAY_BOOL, 1, False),     # bool
 111     b'c'[0]: lambda read: unpack_array(read, data_types.ARRAY_BYTE, 1, False),     # ubyte
 112     b'i'[0]: lambda read: unpack_array(read, data_types.ARRAY_INT32, 4, True),     # int
 113     b'l'[0]: lambda read: unpack_array(read, data_types.ARRAY_INT64, 8, True),     # long
 114     b'f'[0]: lambda read: unpack_array(read, data_types.ARRAY_FLOAT32, 4, False),  # float
 115     b'd'[0]: lambda read: unpack_array(read, data_types.ARRAY_FLOAT64, 8, False),  # double
 116 }
 117
 118 read_data_dict = {
 119     b'Z'[0]: lambda read: unpack(b'<b', read(1))[0],  # byte
 120     b'Y'[0]: lambda read: unpack(b'<h', read(2))[0],  # 16 bit int
 121     b'B'[0]: lambda read: unpack(b'?', read(1))[0],   # 1 bit bool (yes/no)
 122     b'C'[0]: lambda read: unpack(b'<c', read(1))[0],  # char
 123     b'I'[0]: lambda read: unpack(b'<i', read(4))[0],  # 32 bit int
 124     b'F'[0]: lambda read: unpack(b'<f', read(4))[0],  # 32 bit float
 125     b'D'[0]: lambda read: unpack(b'<d', read(8))[0],  # 64 bit float
 126     b'L'[0]: lambda read: unpack(b'<q', read(8))[0],  # 64 bit int
 127     b'R'[0]: lambda read: read(read_uint(read)),      # binary data
 128     b'S'[0]: lambda read: read(read_uint(read)),      # string data
 129     }
 130
 131
 132 # FBX 7500 (aka FBX2016) introduces incompatible changes at binary level:
 133 #   * The NULL block marking end of nested stuff switches from 13 bytes long to 25 bytes long.
 134 #   * The FBX element metadata (end_offset, prop_count and prop_length) switch from uint32 to uint64.
 135 def init_version(fbx_version):
 136     global _BLOCK_SENTINEL_LENGTH, _BLOCK_SENTINEL_DATA, read_fbx_elem_start
 137
 138     _BLOCK_SENTINEL_LENGTH = ...
 139     _BLOCK_SENTINEL_DATA = ...
 140
 141     if fbx_version < 7500:
 142         _BLOCK_SENTINEL_LENGTH = 13
 143         read_fbx_elem_start = read_elem_start32
 144     else:
 145         _BLOCK_SENTINEL_LENGTH = 25
 146         read_fbx_elem_start = read_elem_start64
 147     _BLOCK_SENTINEL_DATA = (b'\0' * _BLOCK_SENTINEL_LENGTH)
 148
 149
 150 def read_elem(read, tell, use_namedtuple, decompress_array_func, tell_file_offset=0):
 151     # [0] the offset at which this block ends
 152     # [1] the number of properties in the scope
 153     # [2] the length of the property list
 154     # [3] elem name length
 155     # [4] elem name of the scope/key
 156     # read_fbx_elem_start does not return [2] because we don't use it and does not return [3] because it is only used to
 157     # get [4].
 158     end_offset, prop_count, elem_id = read_fbx_elem_start(read)
 159     if end_offset == 0:
 160         return None
 161
 162     elem_props_type = bytearray(prop_count)  # elem property types
 163     elem_props_data = [None] * prop_count    # elem properties (if any)
 164     elem_subtree = []                        # elem children (if any)
 165
 166     for i in range(prop_count):
 167         data_type = read(1)[0]
 168         if data_type in read_array_dict:
 169             val, needs_decompression = read_array_dict[data_type](read)
 170             if needs_decompression:
 171                 # Array decompression releases the GIL, so can be multithreaded (if possible on the current system) for
 172                 # performance.
 173                 # After decompressing, the array is inserted into elem_props_data[i].
 174                 decompress_array_func(elem_props_data, i, val)
 175             else:
 176                 elem_props_data[i] = val
 177         else:
 178             elem_props_data[i] = read_data_dict[data_type](read)
 179         elem_props_type[i] = data_type
 180
 181     pos = tell()
 182     local_end_offset = end_offset - tell_file_offset
 183
 184     if pos < local_end_offset:
 185         # The default BufferedReader used when `open()`-ing files in 'rb' mode has to get the raw stream position from
 186         # the OS every time its tell() function is called. This is about 10 times slower than the tell() function of
 187         # BytesIO objects, so reading chunks of bytes from the file into memory at once and exposing them through
 188         # BytesIO can give better performance. We know the total size of each element's subtree so can read entire
 189         # subtrees into memory at a time.
 190         # The "Objects" element's subtree, however, usually makes up most of the file, so we specifically avoid reading
 191         # all its sub-elements into memory at once to reduce memory requirements at the cost of slightly worse
 192         # performance when memory is not a concern.
 193         # If we're currently reading directly from the opened file, then tell_file_offset will be zero.
 194         if tell_file_offset == 0 and elem_id != b"Objects":
 195             block_bytes_remaining = local_end_offset - pos
 196
 197             # Read the entire subtree
 198             sub_elem_bytes = read(block_bytes_remaining)
 199             num_bytes_read = len(sub_elem_bytes)
 200             if num_bytes_read != block_bytes_remaining:
 201                 raise IOError("failed to read complete nested block, expected %i bytes, but only got %i"
 202                               % (block_bytes_remaining, num_bytes_read))
 203
 204             # BytesIO provides IO API for reading bytes in memory, so we can use the same code as reading bytes directly
 205             # from a file.
 206             f = BytesIO(sub_elem_bytes)
 207             tell = f.tell
 208             read = f.read
 209             # The new `tell` function starts at zero and is offset by `pos` bytes from the start of the file.
 210             start_sub_pos = 0
 211             tell_file_offset = pos
 212             sub_tree_end = block_bytes_remaining - _BLOCK_SENTINEL_LENGTH
 213         else:
 214             # The `tell` function is unchanged, so starts at the value returned by `tell()`, which is still `pos`
 215             # because no reads have been made since then.
 216             start_sub_pos = pos
 217             sub_tree_end = local_end_offset - _BLOCK_SENTINEL_LENGTH
 218
 219         sub_pos = start_sub_pos
 220         while sub_pos < sub_tree_end:
 221             elem_subtree.append(read_elem(read, tell, use_namedtuple, decompress_array_func, tell_file_offset))
 222             sub_pos = tell()
 223
 224         # At the end of each subtree there should be a sentinel (an empty element with all bytes set to zero).
 225         if read(_BLOCK_SENTINEL_LENGTH) != _BLOCK_SENTINEL_DATA:
 226             raise IOError("failed to read nested block sentinel, "
 227                           "expected all bytes to be 0")
 228
 229         # Update `pos` for the number of bytes that have been read.
 230         pos += (sub_pos - start_sub_pos) + _BLOCK_SENTINEL_LENGTH
 231
 232     if pos != local_end_offset:
 233         raise IOError("scope length not reached, something is wrong")
 234
 235     args = (elem_id, elem_props_data, elem_props_type, elem_subtree)
 236     return FBXElem(*args) if use_namedtuple else args
 237
 238
 239 def parse_version(fn):
 240     """
 241     Return the FBX version,
 242     if the file isn't a binary FBX return zero.
 243     """
 244     with open(fn, 'rb') as f:
 245         read = f.read
 246
 247         if read(len(_HEAD_MAGIC)) != _HEAD_MAGIC:
 248             return 0
 249
 250         return read_uint(read)
 251
 252
 253 def parse(fn, use_namedtuple=True):
 254     root_elems = []
 255
 256     multithread_decompress_array_cm = MultiThreadedTaskConsumer.new_cpu_bound_cm(_decompress_and_insert_array)
 257     with open(fn, 'rb') as f, multithread_decompress_array_cm as decompress_array_func:
 258         read = f.read
 259         tell = f.tell
 260
 261         if read(len(_HEAD_MAGIC)) != _HEAD_MAGIC:
 262             raise IOError("Invalid header")
 263
 264         fbx_version = read_uint(read)
 265         init_version(fbx_version)
 266
 267         while True:
 268             elem = read_elem(read, tell, use_namedtuple, decompress_array_func)
 269             if elem is None:
 270                 break
 271             root_elems.append(elem)
 272
 273     args = (b'', [], bytearray(0), root_elems)
 274     return FBXElem(*args) if use_namedtuple else args, fbx_version