Finally validates AND works with the strict doctype; moved some of the
[pyTivo/TheBayer.git] / mutagen / ogg.py
blobf77b686609d476cf173f7fd5fc13bac3d95327a5
1 # Copyright 2006 Joe Wreschnig <piman@sacredchao.net>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $
9 """Read and write Ogg bitstreams and pages.
11 This module reads and writes a subset of the Ogg bitstream format
12 version 0. It does *not* read or write Ogg Vorbis files! For that,
13 you should use mutagen.oggvorbis.
15 This implementation is based on the RFC 3533 standard found at
16 http://www.xiph.org/ogg/doc/rfc3533.txt.
17 """
19 import struct
20 import sys
21 import zlib
23 from cStringIO import StringIO
25 from mutagen import FileType
26 from mutagen._util import cdata, insert_bytes, delete_bytes
28 class error(IOError):
29 """Ogg stream parsing errors."""
30 pass
32 class OggPage(object):
33 """A single Ogg page (not necessarily a single encoded packet).
35 A page is a header of 26 bytes, followed by the length of the
36 data, followed by the data.
38 The constructor is givin a file-like object pointing to the start
39 of an Ogg page. After the constructor is finished it is pointing
40 to the start of the next page.
42 Attributes:
43 version -- stream structure version (currently always 0)
44 position -- absolute stream position (default -1)
45 serial -- logical stream serial number (default 0)
46 sequence -- page sequence number within logical stream (default 0)
47 offset -- offset this page was read from (default None)
48 complete -- if the last packet on this page is complete (default True)
49 packets -- list of raw packet data (default [])
51 Note that if 'complete' is false, the next page's 'continued'
52 property must be true (so set both when constructing pages).
54 If a file-like object is supplied to the constructor, the above
55 attributes will be filled in based on it.
56 """
58 version = 0
59 __type_flags = 0
60 position = 0L
61 serial = 0
62 sequence = 0
63 offset = None
64 complete = True
66 def __init__(self, fileobj=None):
67 self.packets = []
69 if fileobj is None:
70 return
72 self.offset = fileobj.tell()
74 header = fileobj.read(27)
75 if len(header) == 0:
76 raise EOFError
78 try:
79 (oggs, self.version, self.__type_flags, self.position,
80 self.serial, self.sequence, crc, segments) = struct.unpack(
81 "<4sBBqIIiB", header)
82 except struct.error:
83 raise error("unable to read full header; got %r" % header)
85 if oggs != "OggS":
86 raise error("read %r, expected %r, at 0x%x" % (
87 oggs, "OggS", fileobj.tell() - 27))
89 if self.version != 0:
90 raise error("version %r unsupported" % self.version)
92 total = 0
93 lacings = []
94 lacing_bytes = fileobj.read(segments)
95 if len(lacing_bytes) != segments:
96 raise error("unable to read %r lacing bytes" % segments)
97 for c in map(ord, lacing_bytes):
98 total += c
99 if c < 255:
100 lacings.append(total)
101 total = 0
102 if total:
103 lacings.append(total)
104 self.complete = False
106 self.packets = map(fileobj.read, lacings)
107 if map(len, self.packets) != lacings:
108 raise error("unable to read full data")
110 def __eq__(self, other):
111 """Two Ogg pages are the same if they write the same data."""
112 try:
113 return (self.write() == other.write())
114 except AttributeError:
115 return False
117 def __repr__(self):
118 attrs = ['version', 'position', 'serial', 'sequence', 'offset',
119 'complete', 'continued', 'first', 'last']
120 values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
121 return "<%s %s, %d bytes in %d packets>" % (
122 type(self).__name__, " ".join(values), sum(map(len, self.packets)),
123 len(self.packets))
125 def write(self):
126 """Return a string encoding of the page header and data.
128 A ValueError is raised if the data is too big to fit in a
129 single page.
132 data = [
133 struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags,
134 self.position, self.serial, self.sequence, 0)
137 lacing_data = []
138 for datum in self.packets:
139 quot, rem = divmod(len(datum), 255)
140 lacing_data.append("\xff" * quot + chr(rem))
141 lacing_data = "".join(lacing_data)
142 if not self.complete and lacing_data.endswith("\x00"):
143 lacing_data = lacing_data[:-1]
144 data.append(chr(len(lacing_data)))
145 data.append(lacing_data)
146 data.extend(self.packets)
147 data = "".join(data)
149 # Python's CRC is swapped relative to Ogg's needs.
150 crc = ~zlib.crc32(data.translate(cdata.bitswap), -1)
151 # Although we're using to_int_be, this actually makes the CRC
152 # a proper le integer, since Python's CRC is byteswapped.
153 crc = cdata.to_int_be(crc).translate(cdata.bitswap)
154 data = data[:22] + crc + data[26:]
155 return data
157 def __size(self):
158 size = 27 # Initial header size
159 for datum in self.packets:
160 quot, rem = divmod(len(datum), 255)
161 size += quot + 1
162 if not self.complete and rem == 0:
163 # Packet contains a multiple of 255 bytes and is not
164 # terminated, so we don't have a \x00 at the end.
165 size -= 1
166 size += sum(map(len, self.packets))
167 return size
169 size = property(__size, doc="Total frame size.")
171 def __set_flag(self, bit, val):
172 mask = 1 << bit
173 if val: self.__type_flags |= mask
174 else: self.__type_flags &= ~mask
176 continued = property(
177 lambda self: cdata.test_bit(self.__type_flags, 0),
178 lambda self, v: self.__set_flag(0, v),
179 doc="The first packet is continued from the previous page.")
181 first = property(
182 lambda self: cdata.test_bit(self.__type_flags, 1),
183 lambda self, v: self.__set_flag(1, v),
184 doc="This is the first page of a logical bitstream.")
186 last = property(
187 lambda self: cdata.test_bit(self.__type_flags, 2),
188 lambda self, v: self.__set_flag(2, v),
189 doc="This is the last page of a logical bitstream.")
191 def renumber(klass, fileobj, serial, start):
192 """Renumber pages belonging to a specified logical stream.
194 fileobj must be opened with mode r+b or w+b.
196 Starting at page number 'start', renumber all pages belonging
197 to logical stream 'serial'. Other pages will be ignored.
199 fileobj must point to the start of a valid Ogg page; any
200 occuring after it and part of the specified logical stream
201 will be numbered. No adjustment will be made to the data in
202 the pages nor the granule position; only the page number, and
203 so also the CRC.
205 If an error occurs (e.g. non-Ogg data is found), fileobj will
206 be left pointing to the place in the stream the error occured,
207 but the invalid data will be left intact (since this function
208 does not change the total file size).
211 number = start
212 while True:
213 try: page = OggPage(fileobj)
214 except EOFError:
215 break
216 else:
217 if page.serial != serial:
218 # Wrong stream, skip this page.
219 continue
220 # Changing the number can't change the page size,
221 # so seeking back based on the current size is safe.
222 fileobj.seek(-page.size, 1)
223 page.sequence = number
224 fileobj.write(page.write())
225 fileobj.seek(page.offset + page.size, 0)
226 number += 1
227 renumber = classmethod(renumber)
229 def to_packets(klass, pages, strict=False):
230 """Construct a list of packet data from a list of Ogg pages.
232 If strict is true, the first page must start a new packet,
233 and the last page must end the last packet.
236 serial = pages[0].serial
237 sequence = pages[0].sequence
238 packets = []
240 if strict:
241 if pages[0].continued:
242 raise ValueError("first packet is continued")
243 if not pages[-1].complete:
244 raise ValueError("last packet does not complete")
245 elif pages and pages[0].continued:
246 packets.append("")
248 for page in pages:
249 if serial != page.serial:
250 raise ValueError("invalid serial number in %r" % page)
251 elif sequence != page.sequence:
252 raise ValueError("bad sequence number in %r" % page)
253 else: sequence += 1
255 if page.continued: packets[-1] += page.packets[0]
256 else: packets.append(page.packets[0])
257 packets.extend(page.packets[1:])
259 return packets
260 to_packets = classmethod(to_packets)
262 def from_packets(klass, packets, sequence=0,
263 default_size=4096, wiggle_room=2048):
264 """Construct a list of Ogg pages from a list of packet data.
266 The algorithm will generate pages of approximately
267 default_size in size (rounded down to the nearest multiple of
268 255). However, it will also allow pages to increase to
269 approximately default_size + wiggle_room if allowing the
270 wiggle room would finish a packet (only one packet will be
271 finished in this way per page; if the next packet would fit
272 into the wiggle room, it still starts on a new page).
274 This method reduces packet fragmentation when packet sizes are
275 slightly larger than the default page size, while still
276 ensuring most pages are of the average size.
278 Pages are numbered started at 'sequence'; other information is
279 uninitialized.
282 chunk_size = (default_size // 255) * 255
284 pages = []
286 page = OggPage()
287 page.sequence = sequence
289 for packet in packets:
290 page.packets.append("")
291 while packet:
292 data, packet = packet[:chunk_size], packet[chunk_size:]
293 if page.size < default_size and len(page.packets) < 255:
294 page.packets[-1] += data
295 else:
296 # If we've put any packet data into this page yet,
297 # we need to mark it incomplete. However, we can
298 # also have just started this packet on an already
299 # full page, in which case, just start the new
300 # page with this packet.
301 if page.packets[-1]:
302 page.complete = False
303 if len(page.packets) == 1:
304 page.position = -1L
305 else:
306 page.packets.pop(-1)
307 pages.append(page)
308 page = OggPage()
309 page.continued = not pages[-1].complete
310 page.sequence = pages[-1].sequence + 1
311 page.packets.append(data)
313 if len(packet) < wiggle_room:
314 page.packets[-1] += packet
315 packet = ""
317 if page.packets:
318 pages.append(page)
320 return pages
321 from_packets = classmethod(from_packets)
323 def replace(klass, fileobj, old_pages, new_pages):
324 """Replace old_pages with new_pages within fileobj.
326 old_pages must have come from reading fileobj originally.
327 new_pages are assumed to have the 'same' data as old_pages,
328 and so the serial and sequence numbers will be copied, as will
329 the flags for the first and last pages.
331 fileobj will be resized and pages renumbered as necessary. As
332 such, it must be opened r+b or w+b.
335 # Number the new pages starting from the first old page.
336 first = old_pages[0].sequence
337 for page, seq in zip(new_pages, range(first, first + len(new_pages))):
338 page.sequence = seq
339 page.serial = old_pages[0].serial
341 new_pages[0].first = old_pages[0].first
342 new_pages[0].last = old_pages[0].last
343 new_pages[0].continued = old_pages[0].continued
345 new_pages[-1].first = old_pages[-1].first
346 new_pages[-1].last = old_pages[-1].last
347 new_pages[-1].complete = old_pages[-1].complete
348 if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
349 new_pages[-1].position = -1L
351 new_data = "".join(map(klass.write, new_pages))
353 # Make room in the file for the new data.
354 delta = len(new_data)
355 fileobj.seek(old_pages[0].offset, 0)
356 insert_bytes(fileobj, delta, old_pages[0].offset)
357 fileobj.seek(old_pages[0].offset, 0)
358 fileobj.write(new_data)
359 new_data_end = old_pages[0].offset + delta
361 # Go through the old pages and delete them. Since we shifted
362 # the data down the file, we need to adjust their offsets. We
363 # also need to go backwards, so we don't adjust the deltas of
364 # the other pages.
365 old_pages.reverse()
366 for old_page in old_pages:
367 adj_offset = old_page.offset + delta
368 delete_bytes(fileobj, old_page.size, adj_offset)
370 # Finally, if there's any discrepency in length, we need to
371 # renumber the pages for the logical stream.
372 if len(old_pages) != len(new_pages):
373 fileobj.seek(new_data_end, 0)
374 serial = new_pages[-1].serial
375 sequence = new_pages[-1].sequence + 1
376 klass.renumber(fileobj, serial, sequence)
377 replace = classmethod(replace)
379 def find_last(klass, fileobj, serial):
380 """Find the last page of the stream 'serial'.
382 If the file is not multiplexed this function is fast. If it is,
383 it must read the whole the stream.
385 This finds the last page in the actual file object, or the last
386 page in the stream (with eos set), whichever comes first.
389 # For non-muxed streams, look at the last page.
390 try: fileobj.seek(-256*256, 2)
391 except IOError:
392 # The file is less than 64k in length.
393 fileobj.seek(0)
394 data = fileobj.read()
395 try: index = data.rindex("OggS")
396 except ValueError:
397 raise error("unable to find final Ogg header")
398 stringobj = StringIO(data[index:])
399 best_page = None
400 try:
401 page = OggPage(stringobj)
402 except error:
403 pass
404 else:
405 if page.serial == serial:
406 if page.last: return page
407 else: best_page = page
408 else: best_page = None
410 # The stream is muxed, so use the slow way.
411 fileobj.seek(0)
412 try:
413 page = OggPage(fileobj)
414 while not page.last:
415 page = OggPage(fileobj)
416 while page.serial != serial:
417 page = OggPage(fileobj)
418 best_page = page
419 return page
420 except error:
421 return best_page
422 except EOFError:
423 return best_page
424 find_last = classmethod(find_last)
426 class OggFileType(FileType):
427 """An generic Ogg file."""
429 _Info = None
430 _Tags = None
431 _Error = None
432 _mimes = ["application/ogg", "application/x-ogg"]
434 def load(self, filename):
435 """Load file information from a filename."""
437 self.filename = filename
438 fileobj = file(filename, "rb")
439 try:
440 try:
441 self.info = self._Info(fileobj)
442 self.tags = self._Tags(fileobj, self.info)
444 if self.info.length:
445 # The streaminfo gave us real length information,
446 # don't waste time scanning the Ogg.
447 return
449 last_page = OggPage.find_last(fileobj, self.info.serial)
450 samples = last_page.position
451 try:
452 denom = self.info.sample_rate
453 except AttributeError:
454 denom = self.info.fps
455 self.info.length = samples / float(denom)
457 except error, e:
458 raise self._Error, e, sys.exc_info()[2]
459 except EOFError:
460 raise self._Error, "no appropriate stream found"
461 finally:
462 fileobj.close()
464 def delete(self, filename=None):
465 """Remove tags from a file.
467 If no filename is given, the one most recently loaded is used.
469 if filename is None:
470 filename = self.filename
472 self.tags.clear()
473 fileobj = file(filename, "rb+")
474 try:
475 try: self.tags._inject(fileobj)
476 except error, e:
477 raise self._Error, e, sys.exc_info()[2]
478 except EOFError:
479 raise self._Error, "no appropriate stream found"
480 finally:
481 fileobj.close()
483 def save(self, filename=None):
484 """Save a tag to a file.
486 If no filename is given, the one most recently loaded is used.
488 if filename is None:
489 filename = self.filename
490 fileobj = file(filename, "rb+")
491 try:
492 try: self.tags._inject(fileobj)
493 except error, e:
494 raise self._Error, e, sys.exc_info()[2]
495 except EOFError:
496 raise self._Error, "no appropriate stream found"
497 finally:
498 fileobj.close()