3 Generate C definitions for parsing Matroska files.
4 Can also be used to directly parse Matroska files and display their contents.
8 # This file is part of MPlayer.
10 # MPlayer is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # MPlayer is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License along
21 # with MPlayer; if not, write to the Free Software Foundation, Inc.,
22 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 'EBML, 1a45dfa3, sub', (
28 'EBMLVersion, 4286, uint',
29 'EBMLReadVersion, 42f7, uint',
30 'EBMLMaxIDLength, 42f2, uint',
31 'EBMLMaxSizeLength, 42f3, uint',
33 'DocTypeVersion, 4287, uint',
34 'DocTypeReadVersion, 4285, uint',
42 'Segment, 18538067, sub', (
44 'SeekHead*, 114d9b74, sub', (
46 'SeekID, 53ab, ebml_id',
47 'SeekPosition, 53ac, uint',
51 'Info*, 1549a966, sub', (
52 'SegmentUID, 73a4, binary',
53 'PrevUID, 3cb923, binary',
54 'NextUID, 3eb923, binary',
55 'TimecodeScale, 2ad7b1, uint',
56 'DateUTC, 4461, sint',
58 'MuxingApp, 4d80, str',
59 'WritingApp, 5741, str',
60 'Duration, 4489, float',
63 'Cluster*, 1f43b675, sub', (
65 'BlockGroup*, a0, sub', (
67 'BlockDuration, 9b, uint',
68 'ReferenceBlock*, fb, sint',
70 'SimpleBlock*, a3, binary',
73 'Tracks*, 1654ae6b, sub', (
74 'TrackEntry*, ae, sub', (
75 'TrackNumber, d7, uint',
76 'TrackUID, 73c5, uint',
77 'TrackType, 83, uint',
78 'FlagEnabled, b9, uint',
79 'FlagDefault, 88, uint',
80 'FlagForced, 55aa, uint',
81 'FlagLacing, 9c, uint',
82 'MinCache, 6de7, uint',
83 'MaxCache, 6df8, uint',
84 'DefaultDuration, 23e383, uint',
85 'TrackTimecodeScale, 23314f, float',
86 'MaxBlockAdditionID, 55ee, uint',
88 'Language, 22b59c, str',
90 'CodecPrivate, 63a2, binary',
91 'CodecDecodeAll, aa, uint',
93 'FlagInterlaced, 9a, uint',
94 'PixelWidth, b0, uint',
95 'PixelHeight, ba, uint',
96 'DisplayWidth, 54b0, uint',
97 'DisplayHeight, 54ba, uint',
98 'DisplayUnit, 54b2, uint',
99 'FrameRate, 2383e3, float',
102 'SamplingFrequency, b5, float',
103 'OutputSamplingFrequency, 78b5, float',
104 'Channels, 9f, uint',
105 'BitDepth, 6264, uint',
107 'ContentEncodings, 6d80, sub', (
108 'ContentEncoding*, 6240, sub', (
109 'ContentEncodingOrder, 5031, uint',
110 'ContentEncodingScope, 5032, uint',
111 'ContentEncodingType, 5033, uint',
112 'ContentCompression, 5034, sub', (
113 'ContentCompAlgo, 4254, uint',
114 'ContentCompSettings, 4255, binary',
121 'Cues, 1c53bb6b, sub', (
122 'CuePoint*, bb, sub', (
124 'CueTrackPositions*, b7, sub', (
125 'CueTrack, f7, uint',
126 'CueClusterPosition, f1, uint',
131 'Attachments, 1941a469, sub', (
132 'AttachedFile*, 61a7, sub', (
133 'FileDescription, 467e, str',
134 'FileName, 466e, str',
135 'FileMimeType, 4660, str',
136 'FileData, 465c, binary',
137 'FileUID, 46ae, uint',
141 'Chapters, 1043a770, sub', (
142 'EditionEntry*, 45b9, sub', (
143 'EditionUID, 45bc, uint',
144 'EditionFlagHidden, 45bd, uint',
145 'EditionFlagDefault, 45db, uint',
146 'EditionFlagOrdered, 45dd, uint',
147 'ChapterAtom*, b6, sub', (
148 'ChapterUID, 73c4, uint',
149 'ChapterTimeStart, 91, uint',
150 'ChapterTimeEnd, 92, uint',
151 'ChapterFlagHidden, 98, uint',
152 'ChapterFlagEnabled, 4598, uint',
153 'ChapterSegmentUID, 6e67, binary',
154 'ChapterSegmentEditionUID, 6ebc, uint',
155 'ChapterDisplay*, 80, sub', (
156 'ChapString, 85, str',
157 'ChapLanguage*, 437c, str',
158 'ChapCountry*, 437e, str',
163 'Tags*, 1254c367, sub', (
165 'Targets, 63c0, sub', (
166 'TargetTypeValue, 68ca, uint',
167 'TargetTrackUID, 63c5, uint',
168 'TargetEditionUID, 63c9, uint',
169 'TargetChapterUID, 63c4, uint',
170 'TargetAttachmentUID, 63c6, uint',
172 'SimpleTag*, 67c8, sub', (
173 'TagName, 45a3, str',
174 'TagLanguage, 447a, str',
175 'TagString, 4487, str'
184 from math
import ldexp
185 from binascii
import hexlify
188 return int(hexlify(s
), 16)
190 class EOF(Exception): pass
192 def camelcase_to_words(name
):
195 for i
in range(1, len(name
)):
196 if name
[i
].isupper() and (name
[i
-1].islower() or
197 name
[i
+1:i
+2].islower()):
198 parts
.append(name
[start
:i
])
200 parts
.append(name
[start
:])
201 return '_'.join(parts
).lower()
203 class MatroskaElement(object):
205 def __init__(self
, name
, elid
, valtype
, namespace
):
207 self
.definename
= '{}_ID_{}'.format(namespace
, name
.upper())
208 self
.fieldname
= camelcase_to_words(name
)
209 self
.structname
= 'ebml_' + self
.fieldname
211 self
.valtype
= valtype
213 self
.ebmltype
= 'EBML_TYPE_SUBELEMENTS'
214 self
.valname
= 'struct ' + self
.structname
216 self
.ebmltype
= 'EBML_TYPE_' + valtype
.upper()
218 self
.valname
= {'uint': 'uint64_t', 'str': 'struct bstr',
219 'binary': 'struct bstr', 'ebml_id': 'uint32_t',
220 'float': 'double', 'sint': 'int64_t',
223 raise SyntaxError('Unrecognized value type ' + valtype
)
224 self
.subelements
= ()
226 def add_subelements(self
, subelements
):
227 self
.subelements
= subelements
228 self
.subids
= set(x
[0].elid
for x
in subelements
)
232 def parse_elems(l
, namespace
):
235 if isinstance(el
, str):
236 name
, hexid
, eltype
= [x
.strip() for x
in el
.split(',')]
237 multiple
= name
.endswith('*')
238 name
= name
.strip('*')
239 new
= MatroskaElement(name
, hexid
, eltype
, namespace
)
240 elementd
[hexid
] = new
241 elementlist
.append(new
)
242 subelements
.append((new
, multiple
))
244 new
.add_subelements(parse_elems(el
, namespace
))
247 parse_elems(elements_ebml
, 'EBML')
248 parse_elems(elements_matroska
, 'MATROSKA')
250 def generate_C_header():
251 print('// Generated by TOOLS/matroska.py, do not edit manually')
254 for el
in elementlist
:
255 print('#define {0.definename:40} 0x{0.elid}'.format(el
))
259 for el
in reversed(elementlist
):
260 if not el
.subelements
:
263 print('struct {0.structname} {{'.format(el
))
264 l
= max(len(subel
.valname
) for subel
, multiple
in el
.subelements
)+1
265 for subel
, multiple
in el
.subelements
:
266 print(' {e.valname:{l}} {star}{e.fieldname};'.format(
267 e
=subel
, l
=l
, star
=' *'[multiple
]))
269 for subel
, multiple
in el
.subelements
:
270 print(' int n_{0.fieldname};'.format(subel
))
273 for el
in elementlist
:
274 if not el
.subelements
:
276 print('extern const struct ebml_elem_desc {0.structname}_desc;'.format(
280 print('#define MAX_EBML_SUBELEMENTS', max(len(el
.subelements
)
281 for el
in elementlist
))
285 def generate_C_definitions():
286 print('// Generated by TOOLS/matroska.py, do not edit manually')
288 for el
in reversed(elementlist
):
291 print('#define N', el
.fieldname
)
292 print('E_S("{}", {})'.format(el
.name
, len(el
.subelements
)))
293 for subel
, multiple
in el
.subelements
:
294 print('F({0.definename}, {0.fieldname}, {1})'.format(
295 subel
, int(multiple
)))
299 print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el
))
313 while not ord(t
) & mask
:
325 while not ord(t
) & mask
:
328 t
= bytes((ord(t
) & (mask
- 1),))
330 return i
+1, byte2num(t
)
332 def read_str(s
, length
):
333 return read(s
, length
)
335 def read_uint(s
, length
):
339 def read_sint(s
, length
):
340 i
= read_uint(s
, length
)
341 mask
= 1 << (length
* 8 - 1)
346 def read_float(s
, length
):
350 f
= ldexp((i
& 0x7fffff) + (1 << 23), (i
>> 23 & 0xff) - 150)
354 f
= ldexp((i
& ((1 << 52) - 1)) + (1 << 52), (i
>> 52 & 0x7ff) - 1075)
361 def parse_one(s
, depth
, parent
, maxlen
):
362 elid
= hexlify(read_id(s
)).decode('ascii')
363 elem
= elementd
.get(elid
)
364 if parent
is not None and elid
not in parent
.subids
and elid
not in ('ec', 'bf'):
365 print('Unexpected:', elid
)
367 raise NotImplementedError
368 size
, length
= read_vint(s
)
369 this_length
= len(elid
) / 2 + size
+ length
371 if elem
.valtype
!= 'skip':
372 print(depth
, elid
, elem
.name
, 'size:', length
, 'value:', end
=' ')
373 if elem
.valtype
== 'sub':
374 print('subelements:')
376 length
-= parse_one(s
, depth
+ 1, elem
, length
)
379 elif elem
.valtype
== 'str':
380 print('string', repr(read_str(s
, length
).decode('utf8', 'replace')))
381 elif elem
.valtype
in ('binary', 'ebml_id'):
382 t
= read_str(s
, length
)
384 if elem
.valtype
== 'ebml_id':
385 idelem
= elementd
.get(hexlify(t
).decode('ascii'))
389 dec
= '({0.name})'.format(idelem
)
391 t
= hexlify(t
).decode('ascii')
393 t
= '<skipped {} bytes>'.format(len(t
))
394 print('binary', t
, dec
)
395 elif elem
.valtype
== 'uint':
396 print('uint', read_uint(s
, length
))
397 elif elem
.valtype
== 'sint':
398 print('sint', read_sint(s
, length
))
399 elif elem
.valtype
== 'float':
400 print('float', read_float(s
, length
))
401 elif elem
.valtype
== 'skip':
404 raise NotImplementedError
406 print(depth
, 'Unknown element:', elid
, 'size:', length
)
410 def parse_toplevel(s
):
411 parse_one(s
, 0, None, 1 << 63)
413 if sys
.argv
[1] == '--generate-header':
415 elif sys
.argv
[1] == '--generate-definitions':
416 generate_C_definitions()
418 s
= open(sys
.argv
[1], "rb")
424 if s
.tell() != start
:
425 raise Exception("Unexpected end of file")