1 # Copyright 2006 Joe Wreschnig
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: m4a.py 4231 2007-12-15 08:13:53Z luks $
9 """Read and write MPEG-4 audio files with iTunes metadata.
11 This module will read MPEG-4 audio information and metadata,
12 as found in Apple's M4A (aka MP4, M4B, M4P) files.
14 There is no official specification for this format. The source code
15 for TagLib, FAAD, and various MPEG specifications at
16 http://developer.apple.com/documentation/QuickTime/QTFF/,
17 http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
18 and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
21 This module does not support 64 bit atom sizes, and so will not
22 work on metadata over 4GB.
28 from cStringIO
import StringIO
30 from mutagen
import FileType
, Metadata
31 from mutagen
._constants
import GENRES
32 from mutagen
._util
import cdata
, insert_bytes
, delete_bytes
, DictProxy
34 class error(IOError): pass
35 class M4AMetadataError(error
): pass
36 class M4AStreamInfoError(error
): pass
37 class M4AMetadataValueError(ValueError, M4AMetadataError
): pass
41 "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning)
43 # This is not an exhaustive list of container atoms, but just the
44 # ones this module needs to peek inside.
45 _CONTAINERS
= ["moov", "udta", "trak", "mdia", "meta", "ilst",
46 "stbl", "minf", "stsd"]
47 _SKIP_SIZE
= { "meta": 4 }
49 __all__
= ['M4A', 'Open', 'delete', 'M4ACover']
55 imageformat -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
60 def __new__(cls
, data
, imageformat
=None):
61 self
= str.__new
__(cls
, data
)
62 if imageformat
is None: imageformat
= M4ACover
.FORMAT_JPEG
63 self
.imageformat
= imageformat
65 except AttributeError:
66 self
.format
= imageformat
70 """An individual atom.
73 children -- list child atoms (or None for non-container atoms)
74 length -- length of this atom, including length and name
75 name -- four byte name of the atom, as a str
76 offset -- location in the constructor-given fileobj of this atom
78 This structure should only be used internally by Mutagen.
83 def __init__(self
, fileobj
):
84 self
.offset
= fileobj
.tell()
85 self
.length
, self
.name
= struct
.unpack(">I4s", fileobj
.read(8))
87 raise error("64 bit atom sizes are not supported")
91 if self
.name
in _CONTAINERS
:
93 fileobj
.seek(_SKIP_SIZE
.get(self
.name
, 0), 1)
94 while fileobj
.tell() < self
.offset
+ self
.length
:
95 self
.children
.append(Atom(fileobj
))
97 fileobj
.seek(self
.offset
+ self
.length
, 0)
99 def render(name
, data
):
100 """Render raw atom data."""
101 # this raises OverflowError if Py_ssize_t can't handle the atom data
103 if size
<= 0xFFFFFFFF:
104 return struct
.pack(">I4s", size
, name
) + data
106 return struct
.pack(">I4sQ", 1, name
, size
+ 8) + data
107 render
= staticmethod(render
)
109 def __getitem__(self
, remaining
):
110 """Look up a child atom, potentially recursively.
112 e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
116 elif self
.children
is None:
117 raise KeyError("%r is not a container" % self
.name
)
118 for child
in self
.children
:
119 if child
.name
== remaining
[0]:
120 return child
[remaining
[1:]]
122 raise KeyError, "%r not found" % remaining
[0]
125 klass
= self
.__class
__.__name
__
126 if self
.children
is None:
127 return "<%s name=%r length=%r offset=%r>" % (
128 klass
, self
.name
, self
.length
, self
.offset
)
130 children
= "\n".join([" " + line
for child
in self
.children
131 for line
in repr(child
).splitlines()])
132 return "<%s name=%r length=%r offset=%r\n%s>" % (
133 klass
, self
.name
, self
.length
, self
.offset
, children
)
136 """Root atoms in a given file.
139 atoms -- a list of top-level atoms as Atom objects
141 This structure should only be used internally by Mutagen.
143 def __init__(self
, fileobj
):
148 while fileobj
.tell() < end
:
149 self
.atoms
.append(Atom(fileobj
))
151 def path(self
, *names
):
152 """Look up and return the complete path of an atom.
154 For example, atoms.path('moov', 'udta', 'meta') will return a
155 list of three atoms, corresponding to the moov, udta, and meta
160 path
.append(path
[-1][name
,])
163 def __getitem__(self
, names
):
164 """Look up a child atom.
166 'names' may be a list of atoms (['moov', 'udta']) or a string
167 specifying the complete path ('moov.udta').
169 if isinstance(names
, basestring
):
170 names
= names
.split(".")
171 for child
in self
.atoms
:
172 if child
.name
== names
[0]:
173 return child
[names
[1:]]
175 raise KeyError, "%s not found" % names
[0]
178 return "\n".join([repr(child
) for child
in self
.atoms
])
180 class M4ATags(DictProxy
, Metadata
):
181 """Dictionary containing Apple iTunes metadata list key/values.
183 Keys are four byte identifiers, except for freeform ('----')
184 keys. Values are usually unicode strings, but some atoms have a
187 trkn, disk -- tuple of 16 bit ints (current, total)
189 covr -- list of M4ACover objects (which are tagged strs)
190 gnre -- not supported. Use '\\xa9gen' instead.
192 The freeform '----' frames use a key in the format '----:mean:name'
193 where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
194 identifier for this frame. The value is a str, but is probably
195 text that can be decoded as UTF-8.
197 M4A tag data cannot exist outside of the structure of an M4A file,
198 so this class should not be manually instantiated.
200 Unknown non-text tags are removed.
203 def load(self
, atoms
, fileobj
):
204 try: ilst
= atoms
["moov.udta.meta.ilst"]
205 except KeyError, key
:
206 raise M4AMetadataError(key
)
207 for atom
in ilst
.children
:
208 fileobj
.seek(atom
.offset
+ 8)
209 data
= fileobj
.read(atom
.length
- 8)
210 parse
= self
.__atoms
.get(atom
.name
, (M4ATags
.__parse
_text
,))[0]
211 parse(self
, atom
, data
)
213 def __key_sort(item1
, item2
):
216 # iTunes always writes the tags in order of "relevance", try
217 # to copy it as closely as possible.
218 order
= ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
219 "\xa9gen", "gnre", "trkn", "disk",
220 "\xa9day", "cpil", "tmpo", "\xa9too",
221 "----", "covr", "\xa9lyr"]
222 order
= dict(zip(order
, range(len(order
))))
224 # If there's no key-based way to distinguish, order by length.
225 # If there's still no way, go by string comparison on the
226 # values, so we at least have something determinstic.
227 return (cmp(order
.get(key1
[:4], last
), order
.get(key2
[:4], last
)) or
228 cmp(len(v1
), len(v2
)) or cmp(v1
, v2
))
229 __key_sort
= staticmethod(__key_sort
)
231 def save(self
, filename
):
232 """Save the metadata to the given filename."""
235 items
.sort(self
.__key
_sort
)
236 for key
, value
in items
:
237 render
= self
.__atoms
.get(
238 key
[:4], (None, M4ATags
.__render
_text
))[1]
239 values
.append(render(self
, key
, value
))
240 data
= Atom
.render("ilst", "".join(values
))
242 # Find the old atoms.
243 fileobj
= open(filename
, "rb+")
245 atoms
= Atoms(fileobj
)
249 if moov
!= atoms
.atoms
[-1]:
250 # "Free" the old moov block. Something in the mdat
251 # block is not happy when its offset changes and it
252 # won't play back. So, rather than try to figure that
253 # out, just move the moov atom to the end of the file.
254 offset
= self
.__move
_moov
(fileobj
, moov
)
259 path
= atoms
.path("moov", "udta", "meta", "ilst")
261 self
.__save
_new
(fileobj
, atoms
, data
, offset
)
263 self
.__save
_existing
(fileobj
, atoms
, path
, data
, offset
)
267 def __move_moov(self
, fileobj
, moov
):
268 fileobj
.seek(moov
.offset
)
269 data
= fileobj
.read(moov
.length
)
270 fileobj
.seek(moov
.offset
)
271 free
= Atom
.render("free", "\x00" * (moov
.length
- 8))
274 # Figure out how far we have to shift all our successive
275 # seek calls, relative to what the atoms say.
276 old_end
= fileobj
.tell()
278 return old_end
- moov
.offset
280 def __save_new(self
, fileobj
, atoms
, ilst
, offset
):
281 hdlr
= Atom
.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
282 meta
= Atom
.render("meta", "\x00\x00\x00\x00" + hdlr
+ ilst
)
283 moov
, udta
= atoms
.path("moov", "udta")
284 insert_bytes(fileobj
, len(meta
), udta
.offset
+ offset
+ 8)
285 fileobj
.seek(udta
.offset
+ offset
+ 8)
287 self
.__update
_parents
(fileobj
, [moov
, udta
], len(meta
), offset
)
289 def __save_existing(self
, fileobj
, atoms
, path
, data
, offset
):
290 # Replace the old ilst atom.
292 delta
= len(data
) - ilst
.length
293 fileobj
.seek(ilst
.offset
+ offset
)
295 insert_bytes(fileobj
, delta
, ilst
.offset
+ offset
)
297 delete_bytes(fileobj
, -delta
, ilst
.offset
+ offset
)
298 fileobj
.seek(ilst
.offset
+ offset
)
300 self
.__update
_parents
(fileobj
, path
, delta
, offset
)
302 def __update_parents(self
, fileobj
, path
, delta
, offset
):
303 # Update all parent atoms with the new size.
305 fileobj
.seek(atom
.offset
+ offset
)
306 size
= cdata
.uint_be(fileobj
.read(4)) + delta
307 fileobj
.seek(atom
.offset
+ offset
)
308 fileobj
.write(cdata
.to_uint_be(size
))
310 def __render_data(self
, key
, flags
, data
):
311 data
= struct
.pack(">2I", flags
, 0) + data
312 return Atom
.render(key
, Atom
.render("data", data
))
314 def __parse_freeform(self
, atom
, data
):
316 fileobj
= StringIO(data
)
317 mean_length
= cdata
.uint_be(fileobj
.read(4))
318 # skip over 8 bytes of atom name, flags
319 mean
= fileobj
.read(mean_length
- 4)[8:]
320 name_length
= cdata
.uint_be(fileobj
.read(4))
321 name
= fileobj
.read(name_length
- 4)[8:]
322 value_length
= cdata
.uint_be(fileobj
.read(4))
323 # Name, flags, and reserved bytes
324 value
= fileobj
.read(value_length
- 4)[12:]
326 # Some ---- atoms have no data atom, I have no clue why
327 # they actually end up in the file.
330 self
["%s:%s:%s" % (atom
.name
, mean
, name
)] = value
331 def __render_freeform(self
, key
, value
):
332 dummy
, mean
, name
= key
.split(":", 2)
333 mean
= struct
.pack(">I4sI", len(mean
) + 12, "mean", 0) + mean
334 name
= struct
.pack(">I4sI", len(name
) + 12, "name", 0) + name
335 value
= struct
.pack(">I4s2I", len(value
) + 16, "data", 0x1, 0) + value
336 final
= mean
+ name
+ value
337 return Atom
.render("----", mean
+ name
+ value
)
339 def __parse_pair(self
, atom
, data
):
340 self
[atom
.name
] = struct
.unpack(">2H", data
[18:22])
341 def __render_pair(self
, key
, value
):
343 if 0 <= track
< 1 << 16 and 0 <= total
< 1 << 16:
344 data
= struct
.pack(">4H", 0, track
, total
, 0)
345 return self
.__render
_data
(key
, 0, data
)
347 raise M4AMetadataValueError("invalid numeric pair %r" % (value
,))
349 def __render_pair_no_trailing(self
, key
, value
):
351 if 0 <= track
< 1 << 16 and 0 <= total
< 1 << 16:
352 data
= struct
.pack(">3H", 0, track
, total
)
353 return self
.__render
_data
(key
, 0, data
)
355 raise M4AMetadataValueError("invalid numeric pair %r" % (value
,))
357 def __parse_genre(self
, atom
, data
):
358 # Translate to a freeform genre.
359 genre
= cdata
.short_be(data
[16:18])
360 if "\xa9gen" not in self
:
361 try: self
["\xa9gen"] = GENRES
[genre
- 1]
362 except IndexError: pass
364 def __parse_tempo(self
, atom
, data
):
365 self
[atom
.name
] = cdata
.short_be(data
[16:18])
366 def __render_tempo(self
, key
, value
):
367 if 0 <= value
< 1 << 16:
368 return self
.__render
_data
(key
, 0x15, cdata
.to_ushort_be(value
))
370 raise M4AMetadataValueError("invalid short integer %r" % value
)
372 def __parse_compilation(self
, atom
, data
):
373 try: self
[atom
.name
] = bool(ord(data
[16:17]))
374 except TypeError: self
[atom
.name
] = False
376 def __render_compilation(self
, key
, value
):
377 return self
.__render
_data
(key
, 0x15, chr(bool(value
)))
379 def __parse_cover(self
, atom
, data
):
380 length
, name
, imageformat
= struct
.unpack(">I4sI", data
[:12])
382 raise M4AMetadataError(
383 "unexpected atom %r inside 'covr'" % name
)
384 if imageformat
not in (M4ACover
.FORMAT_JPEG
, M4ACover
.FORMAT_PNG
):
385 imageformat
= M4ACover
.FORMAT_JPEG
386 self
[atom
.name
]= M4ACover(data
[16:length
], imageformat
)
387 def __render_cover(self
, key
, value
):
388 try: imageformat
= value
.imageformat
389 except AttributeError: imageformat
= M4ACover
.FORMAT_JPEG
390 data
= Atom
.render("data", struct
.pack(">2I", imageformat
, 0) + value
)
391 return Atom
.render(key
, data
)
393 def __parse_text(self
, atom
, data
):
394 flags
= cdata
.uint_be(data
[8:12])
396 self
[atom
.name
] = data
[16:].decode('utf-8', 'replace')
397 def __render_text(self
, key
, value
):
398 return self
.__render
_data
(key
, 0x1, value
.encode('utf-8'))
400 def delete(self
, filename
):
405 "----": (__parse_freeform
, __render_freeform
),
406 "trkn": (__parse_pair
, __render_pair
),
407 "disk": (__parse_pair
, __render_pair_no_trailing
),
408 "gnre": (__parse_genre
, None),
409 "tmpo": (__parse_tempo
, __render_tempo
),
410 "cpil": (__parse_compilation
, __render_compilation
),
411 "covr": (__parse_cover
, __render_cover
),
416 for key
, value
in self
.iteritems():
417 key
= key
.decode('latin1')
418 try: values
.append("%s=%s" % (key
, value
))
419 except UnicodeDecodeError:
420 values
.append("%s=[%d bytes of data]" % (key
, len(value
)))
421 return "\n".join(values
)
423 class M4AInfo(object):
424 """MPEG-4 stream information.
427 bitrate -- bitrate in bits per second, as an int
428 length -- file length in seconds, as a float
433 def __init__(self
, atoms
, fileobj
):
434 hdlr
= atoms
["moov.trak.mdia.hdlr"]
435 fileobj
.seek(hdlr
.offset
)
436 if "soun" not in fileobj
.read(hdlr
.length
):
437 raise M4AStreamInfoError("track has no audio data")
439 mdhd
= atoms
["moov.trak.mdia.mdhd"]
440 fileobj
.seek(mdhd
.offset
)
441 data
= fileobj
.read(mdhd
.length
)
442 if ord(data
[8]) == 0:
448 end
= offset
+ struct
.calcsize(fmt
)
449 unit
, length
= struct
.unpack(fmt
, data
[offset
:end
])
450 self
.length
= float(length
) / unit
453 atom
= atoms
["moov.trak.mdia.minf.stbl.stsd"]
454 fileobj
.seek(atom
.offset
)
455 data
= fileobj
.read(atom
.length
)
456 self
.bitrate
= cdata
.uint_be(data
[-17:-13])
457 except (ValueError, KeyError):
458 # Bitrate values are optional.
462 return "MPEG-4 audio, %.2f seconds, %d bps" % (
463 self
.length
, self
.bitrate
)
466 """An MPEG-4 audio file, probably containing AAC.
468 If more than one track is present in the file, the first is used.
469 Only audio ('soun') tracks will be read.
472 _mimes
= ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
474 def load(self
, filename
):
475 self
.filename
= filename
476 fileobj
= open(filename
, "rb")
478 atoms
= Atoms(fileobj
)
479 try: self
.info
= M4AInfo(atoms
, fileobj
)
480 except StandardError, err
:
481 raise M4AStreamInfoError
, err
, sys
.exc_info()[2]
482 try: self
.tags
= M4ATags(atoms
, fileobj
)
483 except M4AMetadataError
:
485 except StandardError, err
:
486 raise M4AMetadataError
, err
, sys
.exc_info()[2]
491 self
.tags
= M4ATags()
493 def score(filename
, fileobj
, header
):
494 return ("ftyp" in header
) + ("mp4" in header
)
495 score
= staticmethod(score
)
499 def delete(filename
):
500 """Remove tags from a file."""
501 M4A(filename
).delete()