Removed hard coding of subtitles that are embedded in a video
[pyTivo/wmcbrine/lucasnz.git] / mutagen / apev2.py
blobd0e808b972a27ff4c8f2a861caec971e18a9ecee
1 # An APEv2 tag reader
3 # Copyright 2005 Joe Wreschnig
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License version 2 as
7 # published by the Free Software Foundation.
9 # $Id: apev2.py 4008 2007-04-21 04:02:07Z piman $
11 """APEv2 reading and writing.
13 The APEv2 format is most commonly used with Musepack files, but is
14 also the format of choice for WavPack and other formats. Some MP3s
15 also have APEv2 tags, but this can cause problems with many MP3
16 decoders and taggers.
18 APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
19 keys can be any ASCII string with characters from 0x20 to 0x7E,
20 between 2 and 255 characters long. Keys are case-sensitive, but
21 readers are recommended to be case insensitive, and it is forbidden to
22 multiple keys which differ only in case. Keys are usually stored
23 title-cased (e.g. 'Artist' rather than 'artist').
25 APEv2 values are slightly more structured than Vorbis comments; values
26 are flagged as one of text, binary, or an external reference (usually
27 a URI).
29 Based off the format specification found at
30 http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
31 """
33 __all__ = ["APEv2", "APEv2File", "Open", "delete"]
35 import struct
36 from cStringIO import StringIO
38 def is_valid_apev2_key(key):
39 return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and
40 key not in ["OggS", "TAG", "ID3", "MP+"])
42 # There are three different kinds of APE tag values.
43 # "0: Item contains text information coded in UTF-8
44 # 1: Item contains binary information
45 # 2: Item is a locator of external stored information [e.g. URL]
46 # 3: reserved"
47 TEXT, BINARY, EXTERNAL = range(3)
49 HAS_HEADER = 1L << 31
50 HAS_NO_FOOTER = 1L << 30
51 IS_HEADER = 1L << 29
53 class error(IOError): pass
54 class APENoHeaderError(error, ValueError): pass
55 class APEUnsupportedVersionError(error, ValueError): pass
56 class APEBadItemError(error, ValueError): pass
58 from mutagen import Metadata, FileType
59 from mutagen._util import DictMixin, cdata, utf8, delete_bytes
61 class _APEv2Data(object):
62 # Store offsets of the important parts of the file.
63 start = header = data = footer = end = None
64 # Footer or header; seek here and read 32 to get version/size/items/flags
65 metadata = None
66 # Actual tag data
67 tag = None
69 version = None
70 size = None
71 items = None
72 flags = 0
74 # The tag is at the start rather than the end. A tag at both
75 # the start and end of the file (i.e. the tag is the whole file)
76 # is not considered to be at the start.
77 is_at_start = False
79 def __init__(self, fileobj):
80 self.__find_metadata(fileobj)
81 self.metadata = max(self.header, self.footer)
82 if self.metadata is None: return
83 self.__fill_missing(fileobj)
84 self.__fix_brokenness(fileobj)
85 if self.data is not None:
86 fileobj.seek(self.data)
87 self.tag = fileobj.read(self.size)
89 def __find_metadata(self, fileobj):
90 # Try to find a header or footer.
92 # Check for a simple footer.
93 try: fileobj.seek(-32, 2)
94 except IOError:
95 fileobj.seek(0, 2)
96 return
97 if fileobj.read(8) == "APETAGEX":
98 fileobj.seek(-8, 1)
99 self.footer = self.metadata = fileobj.tell()
100 return
102 # Check for an APEv2 tag followed by an ID3v1 tag at the end.
103 try:
104 fileobj.seek(-128, 2)
105 if fileobj.read(3) == "TAG":
107 fileobj.seek(-35, 1) # "TAG" + header length
108 if fileobj.read(8) == "APETAGEX":
109 fileobj.seek(-8, 1)
110 self.footer = fileobj.tell()
111 return
113 # ID3v1 tag at the end, maybe preceded by Lyrics3v2.
114 # (http://www.id3.org/lyrics3200.html)
115 # (header length - "APETAGEX") - "LYRICS200"
116 fileobj.seek(15, 1)
117 if fileobj.read(9) == 'LYRICS200':
118 fileobj.seek(-15, 1) # "LYRICS200" + size tag
119 try: offset = int(fileobj.read(6))
120 except ValueError:
121 raise IOError
123 fileobj.seek(-32 - offset - 6, 1)
124 if fileobj.read(8) == "APETAGEX":
125 fileobj.seek(-8, 1)
126 self.footer = fileobj.tell()
127 return
129 except IOError:
130 pass
132 # Check for a tag at the start.
133 fileobj.seek(0, 0)
134 if fileobj.read(8) == "APETAGEX":
135 self.is_at_start = True
136 self.header = 0
138 def __fill_missing(self, fileobj):
139 fileobj.seek(self.metadata + 8)
140 self.version = fileobj.read(4)
141 self.size = cdata.uint_le(fileobj.read(4))
142 self.items = cdata.uint_le(fileobj.read(4))
143 self.flags = cdata.uint_le(fileobj.read(4))
145 if self.header is not None:
146 self.data = self.header + 32
147 # If we're reading the header, the size is the header
148 # offset + the size, which includes the footer.
149 self.end = self.data + self.size
150 fileobj.seek(self.end - 32, 0)
151 if fileobj.read(8) == "APETAGEX":
152 self.footer = self.end - 32
153 elif self.footer is not None:
154 self.end = self.footer + 32
155 self.data = self.end - self.size
156 if self.flags & HAS_HEADER:
157 self.header = self.data - 32
158 else:
159 self.header = self.data
160 else: raise APENoHeaderError("No APE tag found")
162 def __fix_brokenness(self, fileobj):
163 # Fix broken tags written with PyMusepack.
164 if self.header is not None: start = self.header
165 else: start = self.data
166 fileobj.seek(start)
168 while start > 0:
169 # Clean up broken writing from pre-Mutagen PyMusepack.
170 # It didn't remove the first 24 bytes of header.
171 try: fileobj.seek(-24, 1)
172 except IOError:
173 break
174 else:
175 if fileobj.read(8) == "APETAGEX":
176 fileobj.seek(-8, 1)
177 start = fileobj.tell()
178 else: break
179 self.start = start
181 class APEv2(DictMixin, Metadata):
182 """A file with an APEv2 tag.
184 ID3v1 tags are silently ignored and overwritten.
187 filename = None
189 def __init__(self, *args, **kwargs):
190 self.__casemap = {}
191 self.__dict = {}
192 super(APEv2, self).__init__(*args, **kwargs)
193 # Internally all names are stored as lowercase, but the case
194 # they were set with is remembered and used when saving. This
195 # is roughly in line with the standard, which says that keys
196 # are case-sensitive but two keys differing only in case are
197 # not allowed, and recommends case-insensitive
198 # implementations.
200 def pprint(self):
201 """Return tag key=value pairs in a human-readable format."""
202 items = self.items()
203 items.sort()
204 return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items])
206 def load(self, filename):
207 """Load tags from a filename."""
208 self.filename = filename
209 fileobj = open(filename, "rb")
210 try:
211 data = _APEv2Data(fileobj)
212 finally:
213 fileobj.close()
214 if data.tag:
215 self.clear()
216 self.__casemap.clear()
217 self.__parse_tag(data.tag, data.items)
218 else:
219 raise APENoHeaderError("No APE tag found")
221 def __parse_tag(self, tag, count):
222 fileobj = StringIO(tag)
224 for i in range(count):
225 size = cdata.uint_le(fileobj.read(4))
226 flags = cdata.uint_le(fileobj.read(4))
228 # Bits 1 and 2 bits are flags, 0-3
229 # Bit 0 is read/write flag, ignored
230 kind = (flags & 6) >> 1
231 if kind == 3:
232 raise APEBadItemError("value type must be 0, 1, or 2")
233 key = value = fileobj.read(1)
234 while key[-1:] != '\x00' and value:
235 value = fileobj.read(1)
236 key += value
237 if key[-1:] == "\x00":
238 key = key[:-1]
239 value = fileobj.read(size)
240 self[key] = APEValue(value, kind)
242 def __getitem__(self, key):
243 if not is_valid_apev2_key(key):
244 raise KeyError("%r is not a valid APEv2 key" % key)
245 return self.__dict[key.lower()]
247 def __delitem__(self, key):
248 if not is_valid_apev2_key(key):
249 raise KeyError("%r is not a valid APEv2 key" % key)
250 del(self.__dict[key.lower()])
252 def __setitem__(self, key, value):
253 """'Magic' value setter.
255 This function tries to guess at what kind of value you want to
256 store. If you pass in a valid UTF-8 or Unicode string, it
257 treats it as a text value. If you pass in a list, it treats it
258 as a list of string/Unicode values. If you pass in a string
259 that is not valid UTF-8, it assumes it is a binary value.
261 If you need to force a specific type of value (e.g. binary
262 data that also happens to be valid UTF-8, or an external
263 reference), use the APEValue factory and set the value to the
264 result of that:
265 from mutagen.apev2 import APEValue, EXTERNAL
266 tag['Website'] = APEValue('http://example.org', EXTERNAL)
269 if not is_valid_apev2_key(key):
270 raise KeyError("%r is not a valid APEv2 key" % key)
272 if not isinstance(value, _APEValue):
273 # let's guess at the content if we're not already a value...
274 if isinstance(value, unicode):
275 # unicode? we've got to be text.
276 value = APEValue(utf8(value), TEXT)
277 elif isinstance(value, list):
278 # list? text.
279 value = APEValue("\0".join(map(utf8, value)), TEXT)
280 else:
281 try: dummy = value.decode("utf-8")
282 except UnicodeError:
283 # invalid UTF8 text, probably binary
284 value = APEValue(value, BINARY)
285 else:
286 # valid UTF8, probably text
287 value = APEValue(value, TEXT)
288 self.__casemap[key.lower()] = key
289 self.__dict[key.lower()] = value
291 def keys(self):
292 return [self.__casemap.get(key, key) for key in self.__dict.keys()]
294 def save(self, filename=None):
295 """Save changes to a file.
297 If no filename is given, the one most recently loaded is used.
299 Tags are always written at the end of the file, and include
300 a header and a footer.
303 filename = filename or self.filename
304 try:
305 fileobj = open(filename, "r+b")
306 except IOError:
307 fileobj = open(filename, "w+b")
308 data = _APEv2Data(fileobj)
310 if data.is_at_start:
311 delete_bytes(fileobj, data.end - data.start, data.start)
312 elif data.start is not None:
313 fileobj.seek(data.start)
314 # Delete an ID3v1 tag if present, too.
315 fileobj.truncate()
316 fileobj.seek(0, 2)
318 # "APE tags items should be sorted ascending by size... This is
319 # not a MUST, but STRONGLY recommended. Actually the items should
320 # be sorted by importance/byte, but this is not feasible."
321 tags = [v._internal(k) for k, v in self.items()]
322 tags.sort(lambda a, b: cmp(len(a), len(b)))
323 num_tags = len(tags)
324 tags = "".join(tags)
326 header = "APETAGEX%s%s" %(
327 # version, tag size, item count, flags
328 struct.pack("<4I", 2000, len(tags) + 32, num_tags,
329 HAS_HEADER | IS_HEADER),
330 "\0" * 8)
331 fileobj.write(header)
333 fileobj.write(tags)
335 footer = "APETAGEX%s%s" %(
336 # version, tag size, item count, flags
337 struct.pack("<4I", 2000, len(tags) + 32, num_tags,
338 HAS_HEADER),
339 "\0" * 8)
340 fileobj.write(footer)
341 fileobj.close()
343 def delete(self, filename=None):
344 """Remove tags from a file."""
345 filename = filename or self.filename
346 fileobj = open(filename, "r+b")
347 try:
348 data = _APEv2Data(fileobj)
349 if data.start is not None and data.size is not None:
350 delete_bytes(fileobj, data.end - data.start, data.start)
351 finally:
352 fileobj.close()
353 self.clear()
355 Open = APEv2
357 def delete(filename):
358 """Remove tags from a file."""
359 try: APEv2(filename).delete()
360 except APENoHeaderError: pass
362 def APEValue(value, kind):
363 """APEv2 tag value factory.
365 Use this if you need to specify the value's type manually. Binary
366 and text data are automatically detected by APEv2.__setitem__.
368 if kind == TEXT: return APETextValue(value, kind)
369 elif kind == BINARY: return APEBinaryValue(value, kind)
370 elif kind == EXTERNAL: return APEExtValue(value, kind)
371 else: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
373 class _APEValue(object):
374 def __init__(self, value, kind):
375 self.kind = kind
376 self.value = value
378 def __len__(self):
379 return len(self.value)
380 def __str__(self):
381 return self.value
383 # Packed format for an item:
384 # 4B: Value length
385 # 4B: Value type
386 # Key name
387 # 1B: Null
388 # Key value
389 def _internal(self, key):
390 return "%s%s\0%s" %(
391 struct.pack("<2I", len(self.value), self.kind << 1),
392 key, self.value)
394 def __repr__(self):
395 return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
397 class APETextValue(_APEValue):
398 """An APEv2 text value.
400 Text values are Unicode/UTF-8 strings. They can be accessed like
401 strings (with a null seperating the values), or arrays of strings."""
403 def __unicode__(self):
404 return unicode(str(self), "utf-8")
406 def __iter__(self):
407 """Iterate over the strings of the value (not the characters)"""
408 return iter(unicode(self).split("\0"))
410 def __getitem__(self, index):
411 return unicode(self).split("\0")[index]
413 def __len__(self):
414 return self.value.count("\0") + 1
416 def __cmp__(self, other):
417 return cmp(unicode(self), other)
419 __hash__ = _APEValue.__hash__
421 def __setitem__(self, index, value):
422 values = list(self)
423 values[index] = value.encode("utf-8")
424 self.value = "\0".join(values).encode("utf-8")
426 def pprint(self):
427 return " / ".join(self)
429 class APEBinaryValue(_APEValue):
430 """An APEv2 binary value."""
432 def pprint(self): return "[%d bytes]" % len(self)
434 class APEExtValue(_APEValue):
435 """An APEv2 external value.
437 External values are usually URI or IRI strings.
439 def pprint(self): return "[External] %s" % unicode(self)
441 class APEv2File(FileType):
442 class _Info(object):
443 length = 0
444 bitrate = 0
445 def __init__(self, fileobj): pass
446 pprint = staticmethod(lambda: "Unknown format with APEv2 tag.")
448 def load(self, filename):
449 self.filename = filename
450 self.info = self._Info(open(filename, "rb"))
451 try: self.tags = APEv2(filename)
452 except error: self.tags = None
454 def add_tags(self):
455 if self.tags is None:
456 self.tags = APEv2()
457 else:
458 raise ValueError("%r already has tags: %r" % (self, self.tags))
460 def score(filename, fileobj, header):
461 try: fileobj.seek(-160, 2)
462 except IOError:
463 fileobj.seek(0)
464 footer = fileobj.read()
465 filename = filename.lower()
466 return (("APETAGEX" in footer) - header.startswith("ID3"))
467 score = staticmethod(score)