Other code tidy ups and alignment with wmcbrine's fork
[pyTivo/wmcbrine/lucasnz.git] / mutagen / ogg.py
blob2a0c4e00cf60c6b39a1c915c5853e8f1f72daf51
1 # Copyright 2006 Joe Wreschnig
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $
9 """Read and write Ogg bitstreams and pages.
11 This module reads and writes a subset of the Ogg bitstream format
12 version 0. It does *not* read or write Ogg Vorbis files! For that,
13 you should use mutagen.oggvorbis.
15 This implementation is based on the RFC 3533 standard found at
16 http://www.xiph.org/ogg/doc/rfc3533.txt.
17 """
19 import struct
20 import sys
21 import zlib
23 from cStringIO import StringIO
25 from mutagen import FileType
26 from mutagen._util import cdata, insert_bytes, delete_bytes
28 class error(IOError):
29 """Ogg stream parsing errors."""
30 pass
32 class OggPage(object):
33 """A single Ogg page (not necessarily a single encoded packet).
35 A page is a header of 26 bytes, followed by the length of the
36 data, followed by the data.
38 The constructor is givin a file-like object pointing to the start
39 of an Ogg page. After the constructor is finished it is pointing
40 to the start of the next page.
42 Attributes:
43 version -- stream structure version (currently always 0)
44 position -- absolute stream position (default -1)
45 serial -- logical stream serial number (default 0)
46 sequence -- page sequence number within logical stream (default 0)
47 offset -- offset this page was read from (default None)
48 complete -- if the last packet on this page is complete (default True)
49 packets -- list of raw packet data (default [])
51 Note that if 'complete' is false, the next page's 'continued'
52 property must be true (so set both when constructing pages).
54 If a file-like object is supplied to the constructor, the above
55 attributes will be filled in based on it.
56 """
58 version = 0
59 __type_flags = 0
60 position = 0L
61 serial = 0
62 sequence = 0
63 offset = None
64 complete = True
66 def __init__(self, fileobj=None):
67 self.packets = []
69 if fileobj is None:
70 return
72 self.offset = fileobj.tell()
74 header = fileobj.read(27)
75 if len(header) == 0:
76 raise EOFError
78 try:
79 (oggs, self.version, self.__type_flags, self.position,
80 self.serial, self.sequence, crc, segments) = struct.unpack(
81 "<4sBBqIIiB", header)
82 except struct.error:
83 raise error("unable to read full header; got %r" % header)
85 if oggs != "OggS":
86 raise error("read %r, expected %r, at 0x%x" % (
87 oggs, "OggS", fileobj.tell() - 27))
89 if self.version != 0:
90 raise error("version %r unsupported" % self.version)
92 total = 0
93 lacings = []
94 lacing_bytes = fileobj.read(segments)
95 if len(lacing_bytes) != segments:
96 raise error("unable to read %r lacing bytes" % segments)
97 for c in map(ord, lacing_bytes):
98 total += c
99 if c < 255:
100 lacings.append(total)
101 total = 0
102 if total:
103 lacings.append(total)
104 self.complete = False
106 self.packets = map(fileobj.read, lacings)
107 if map(len, self.packets) != lacings:
108 raise error("unable to read full data")
110 def __eq__(self, other):
111 """Two Ogg pages are the same if they write the same data."""
112 try:
113 return (self.write() == other.write())
114 except AttributeError:
115 return False
117 __hash__ = object.__hash__
119 def __repr__(self):
120 attrs = ['version', 'position', 'serial', 'sequence', 'offset',
121 'complete', 'continued', 'first', 'last']
122 values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
123 return "<%s %s, %d bytes in %d packets>" % (
124 type(self).__name__, " ".join(values), sum(map(len, self.packets)),
125 len(self.packets))
127 def write(self):
128 """Return a string encoding of the page header and data.
130 A ValueError is raised if the data is too big to fit in a
131 single page.
134 data = [
135 struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags,
136 self.position, self.serial, self.sequence, 0)
139 lacing_data = []
140 for datum in self.packets:
141 quot, rem = divmod(len(datum), 255)
142 lacing_data.append("\xff" * quot + chr(rem))
143 lacing_data = "".join(lacing_data)
144 if not self.complete and lacing_data.endswith("\x00"):
145 lacing_data = lacing_data[:-1]
146 data.append(chr(len(lacing_data)))
147 data.append(lacing_data)
148 data.extend(self.packets)
149 data = "".join(data)
151 # Python's CRC is swapped relative to Ogg's needs.
152 crc = ~zlib.crc32(data.translate(cdata.bitswap), -1)
153 # Although we're using to_int_be, this actually makes the CRC
154 # a proper le integer, since Python's CRC is byteswapped.
155 crc = cdata.to_int_be(crc).translate(cdata.bitswap)
156 data = data[:22] + crc + data[26:]
157 return data
159 def __size(self):
160 size = 27 # Initial header size
161 for datum in self.packets:
162 quot, rem = divmod(len(datum), 255)
163 size += quot + 1
164 if not self.complete and rem == 0:
165 # Packet contains a multiple of 255 bytes and is not
166 # terminated, so we don't have a \x00 at the end.
167 size -= 1
168 size += sum(map(len, self.packets))
169 return size
171 size = property(__size, doc="Total frame size.")
173 def __set_flag(self, bit, val):
174 mask = 1 << bit
175 if val: self.__type_flags |= mask
176 else: self.__type_flags &= ~mask
178 continued = property(
179 lambda self: cdata.test_bit(self.__type_flags, 0),
180 lambda self, v: self.__set_flag(0, v),
181 doc="The first packet is continued from the previous page.")
183 first = property(
184 lambda self: cdata.test_bit(self.__type_flags, 1),
185 lambda self, v: self.__set_flag(1, v),
186 doc="This is the first page of a logical bitstream.")
188 last = property(
189 lambda self: cdata.test_bit(self.__type_flags, 2),
190 lambda self, v: self.__set_flag(2, v),
191 doc="This is the last page of a logical bitstream.")
193 def renumber(klass, fileobj, serial, start):
194 """Renumber pages belonging to a specified logical stream.
196 fileobj must be opened with mode r+b or w+b.
198 Starting at page number 'start', renumber all pages belonging
199 to logical stream 'serial'. Other pages will be ignored.
201 fileobj must point to the start of a valid Ogg page; any
202 occuring after it and part of the specified logical stream
203 will be numbered. No adjustment will be made to the data in
204 the pages nor the granule position; only the page number, and
205 so also the CRC.
207 If an error occurs (e.g. non-Ogg data is found), fileobj will
208 be left pointing to the place in the stream the error occured,
209 but the invalid data will be left intact (since this function
210 does not change the total file size).
213 number = start
214 while True:
215 try: page = OggPage(fileobj)
216 except EOFError:
217 break
218 else:
219 if page.serial != serial:
220 # Wrong stream, skip this page.
221 continue
222 # Changing the number can't change the page size,
223 # so seeking back based on the current size is safe.
224 fileobj.seek(-page.size, 1)
225 page.sequence = number
226 fileobj.write(page.write())
227 fileobj.seek(page.offset + page.size, 0)
228 number += 1
229 renumber = classmethod(renumber)
231 def to_packets(klass, pages, strict=False):
232 """Construct a list of packet data from a list of Ogg pages.
234 If strict is true, the first page must start a new packet,
235 and the last page must end the last packet.
238 serial = pages[0].serial
239 sequence = pages[0].sequence
240 packets = []
242 if strict:
243 if pages[0].continued:
244 raise ValueError("first packet is continued")
245 if not pages[-1].complete:
246 raise ValueError("last packet does not complete")
247 elif pages and pages[0].continued:
248 packets.append("")
250 for page in pages:
251 if serial != page.serial:
252 raise ValueError("invalid serial number in %r" % page)
253 elif sequence != page.sequence:
254 raise ValueError("bad sequence number in %r" % page)
255 else: sequence += 1
257 if page.continued: packets[-1] += page.packets[0]
258 else: packets.append(page.packets[0])
259 packets.extend(page.packets[1:])
261 return packets
262 to_packets = classmethod(to_packets)
264 def from_packets(klass, packets, sequence=0,
265 default_size=4096, wiggle_room=2048):
266 """Construct a list of Ogg pages from a list of packet data.
268 The algorithm will generate pages of approximately
269 default_size in size (rounded down to the nearest multiple of
270 255). However, it will also allow pages to increase to
271 approximately default_size + wiggle_room if allowing the
272 wiggle room would finish a packet (only one packet will be
273 finished in this way per page; if the next packet would fit
274 into the wiggle room, it still starts on a new page).
276 This method reduces packet fragmentation when packet sizes are
277 slightly larger than the default page size, while still
278 ensuring most pages are of the average size.
280 Pages are numbered started at 'sequence'; other information is
281 uninitialized.
284 chunk_size = (default_size // 255) * 255
286 pages = []
288 page = OggPage()
289 page.sequence = sequence
291 for packet in packets:
292 page.packets.append("")
293 while packet:
294 data, packet = packet[:chunk_size], packet[chunk_size:]
295 if page.size < default_size and len(page.packets) < 255:
296 page.packets[-1] += data
297 else:
298 # If we've put any packet data into this page yet,
299 # we need to mark it incomplete. However, we can
300 # also have just started this packet on an already
301 # full page, in which case, just start the new
302 # page with this packet.
303 if page.packets[-1]:
304 page.complete = False
305 if len(page.packets) == 1:
306 page.position = -1L
307 else:
308 page.packets.pop(-1)
309 pages.append(page)
310 page = OggPage()
311 page.continued = not pages[-1].complete
312 page.sequence = pages[-1].sequence + 1
313 page.packets.append(data)
315 if len(packet) < wiggle_room:
316 page.packets[-1] += packet
317 packet = ""
319 if page.packets:
320 pages.append(page)
322 return pages
323 from_packets = classmethod(from_packets)
325 def replace(klass, fileobj, old_pages, new_pages):
326 """Replace old_pages with new_pages within fileobj.
328 old_pages must have come from reading fileobj originally.
329 new_pages are assumed to have the 'same' data as old_pages,
330 and so the serial and sequence numbers will be copied, as will
331 the flags for the first and last pages.
333 fileobj will be resized and pages renumbered as necessary. As
334 such, it must be opened r+b or w+b.
337 # Number the new pages starting from the first old page.
338 first = old_pages[0].sequence
339 for page, seq in zip(new_pages, range(first, first + len(new_pages))):
340 page.sequence = seq
341 page.serial = old_pages[0].serial
343 new_pages[0].first = old_pages[0].first
344 new_pages[0].last = old_pages[0].last
345 new_pages[0].continued = old_pages[0].continued
347 new_pages[-1].first = old_pages[-1].first
348 new_pages[-1].last = old_pages[-1].last
349 new_pages[-1].complete = old_pages[-1].complete
350 if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
351 new_pages[-1].position = -1L
353 new_data = "".join(map(klass.write, new_pages))
355 # Make room in the file for the new data.
356 delta = len(new_data)
357 fileobj.seek(old_pages[0].offset, 0)
358 insert_bytes(fileobj, delta, old_pages[0].offset)
359 fileobj.seek(old_pages[0].offset, 0)
360 fileobj.write(new_data)
361 new_data_end = old_pages[0].offset + delta
363 # Go through the old pages and delete them. Since we shifted
364 # the data down the file, we need to adjust their offsets. We
365 # also need to go backwards, so we don't adjust the deltas of
366 # the other pages.
367 old_pages.reverse()
368 for old_page in old_pages:
369 adj_offset = old_page.offset + delta
370 delete_bytes(fileobj, old_page.size, adj_offset)
372 # Finally, if there's any discrepency in length, we need to
373 # renumber the pages for the logical stream.
374 if len(old_pages) != len(new_pages):
375 fileobj.seek(new_data_end, 0)
376 serial = new_pages[-1].serial
377 sequence = new_pages[-1].sequence + 1
378 klass.renumber(fileobj, serial, sequence)
379 replace = classmethod(replace)
381 def find_last(klass, fileobj, serial):
382 """Find the last page of the stream 'serial'.
384 If the file is not multiplexed this function is fast. If it is,
385 it must read the whole the stream.
387 This finds the last page in the actual file object, or the last
388 page in the stream (with eos set), whichever comes first.
391 # For non-muxed streams, look at the last page.
392 try: fileobj.seek(-256*256, 2)
393 except IOError:
394 # The file is less than 64k in length.
395 fileobj.seek(0)
396 data = fileobj.read()
397 try: index = data.rindex("OggS")
398 except ValueError:
399 raise error("unable to find final Ogg header")
400 stringobj = StringIO(data[index:])
401 best_page = None
402 try:
403 page = OggPage(stringobj)
404 except error:
405 pass
406 else:
407 if page.serial == serial:
408 if page.last: return page
409 else: best_page = page
410 else: best_page = None
412 # The stream is muxed, so use the slow way.
413 fileobj.seek(0)
414 try:
415 page = OggPage(fileobj)
416 while not page.last:
417 page = OggPage(fileobj)
418 while page.serial != serial:
419 page = OggPage(fileobj)
420 best_page = page
421 return page
422 except error:
423 return best_page
424 except EOFError:
425 return best_page
426 find_last = classmethod(find_last)
428 class OggFileType(FileType):
429 """An generic Ogg file."""
431 _Info = None
432 _Tags = None
433 _Error = None
434 _mimes = ["application/ogg", "application/x-ogg"]
436 def load(self, filename):
437 """Load file information from a filename."""
439 self.filename = filename
440 fileobj = open(filename, "rb")
441 try:
442 try:
443 self.info = self._Info(fileobj)
444 self.tags = self._Tags(fileobj, self.info)
446 if self.info.length:
447 # The streaminfo gave us real length information,
448 # don't waste time scanning the Ogg.
449 return
451 last_page = OggPage.find_last(fileobj, self.info.serial)
452 samples = last_page.position
453 try:
454 denom = self.info.sample_rate
455 except AttributeError:
456 denom = self.info.fps
457 self.info.length = samples / float(denom)
459 except error, e:
460 raise self._Error, e, sys.exc_info()[2]
461 except EOFError:
462 raise self._Error, "no appropriate stream found"
463 finally:
464 fileobj.close()
466 def delete(self, filename=None):
467 """Remove tags from a file.
469 If no filename is given, the one most recently loaded is used.
471 if filename is None:
472 filename = self.filename
474 self.tags.clear()
475 fileobj = open(filename, "rb+")
476 try:
477 try: self.tags._inject(fileobj)
478 except error, e:
479 raise self._Error, e, sys.exc_info()[2]
480 except EOFError:
481 raise self._Error, "no appropriate stream found"
482 finally:
483 fileobj.close()
485 def save(self, filename=None):
486 """Save a tag to a file.
488 If no filename is given, the one most recently loaded is used.
490 if filename is None:
491 filename = self.filename
492 fileobj = open(filename, "rb+")
493 try:
494 try: self.tags._inject(fileobj)
495 except error, e:
496 raise self._Error, e, sys.exc_info()[2]
497 except EOFError:
498 raise self._Error, "no appropriate stream found"
499 finally:
500 fileobj.close()