mutagen/ogg.py

   1 # Copyright 2006 Joe Wreschnig
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License version 2 as
   5 # published by the Free Software Foundation.
   6 #
   7 # $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $
   8
   9 """Read and write Ogg bitstreams and pages.
  10
  11 This module reads and writes a subset of the Ogg bitstream format
  12 version 0. It does *not* read or write Ogg Vorbis files! For that,
  13 you should use mutagen.oggvorbis.
  14
  15 This implementation is based on the RFC 3533 standard found at
  16 http://www.xiph.org/ogg/doc/rfc3533.txt.
  17 """
  18
  19 import struct
  20 import sys
  21 import zlib
  22
  23 from cStringIO import StringIO
  24
  25 from mutagen import FileType
  26 from mutagen._util import cdata, insert_bytes, delete_bytes
  27
  28 class error(IOError):
  29     """Ogg stream parsing errors."""
  30     pass
  31
  32 class OggPage(object):
  33     """A single Ogg page (not necessarily a single encoded packet).
  34
  35     A page is a header of 26 bytes, followed by the length of the
  36     data, followed by the data.
  37
  38     The constructor is givin a file-like object pointing to the start
  39     of an Ogg page. After the constructor is finished it is pointing
  40     to the start of the next page.
  41
  42     Attributes:
  43     version -- stream structure version (currently always 0)
  44     position -- absolute stream position (default -1)
  45     serial -- logical stream serial number (default 0)
  46     sequence -- page sequence number within logical stream (default 0)
  47     offset -- offset this page was read from (default None)
  48     complete -- if the last packet on this page is complete (default True)
  49     packets -- list of raw packet data (default [])
  50
  51     Note that if 'complete' is false, the next page's 'continued'
  52     property must be true (so set both when constructing pages).
  53
  54     If a file-like object is supplied to the constructor, the above
  55     attributes will be filled in based on it.
  56     """
  57
  58     version = 0
  59     __type_flags = 0
  60     position = 0L
  61     serial = 0
  62     sequence = 0
  63     offset = None
  64     complete = True
  65
  66     def __init__(self, fileobj=None):
  67         self.packets = []
  68
  69         if fileobj is None:
  70             return
  71
  72         self.offset = fileobj.tell()
  73
  74         header = fileobj.read(27)
  75         if len(header) == 0:
  76             raise EOFError
  77
  78         try:
  79             (oggs, self.version, self.__type_flags, self.position,
  80              self.serial, self.sequence, crc, segments) = struct.unpack(
  81                 "<4sBBqIIiB", header)
  82         except struct.error:
  83             raise error("unable to read full header; got %r" % header)
  84
  85         if oggs != "OggS":
  86             raise error("read %r, expected %r, at 0x%x" % (
  87                 oggs, "OggS", fileobj.tell() - 27))
  88
  89         if self.version != 0:
  90             raise error("version %r unsupported" % self.version)
  91
  92         total = 0
  93         lacings = []
  94         lacing_bytes = fileobj.read(segments)
  95         if len(lacing_bytes) != segments:
  96             raise error("unable to read %r lacing bytes" % segments)
  97         for c in map(ord, lacing_bytes):
  98             total += c
  99             if c < 255:
 100                 lacings.append(total)
 101                 total = 0
 102         if total:
 103             lacings.append(total)
 104             self.complete = False
 105
 106         self.packets = map(fileobj.read, lacings)
 107         if map(len, self.packets) != lacings:
 108             raise error("unable to read full data")
 109
 110     def __eq__(self, other):
 111         """Two Ogg pages are the same if they write the same data."""
 112         try:
 113             return (self.write() == other.write())
 114         except AttributeError:
 115             return False
 116
 117     __hash__ = object.__hash__
 118
 119     def __repr__(self):
 120         attrs = ['version', 'position', 'serial', 'sequence', 'offset',
 121                  'complete', 'continued', 'first', 'last']
 122         values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
 123         return "<%s %s, %d bytes in %d packets>" % (
 124             type(self).__name__, " ".join(values), sum(map(len, self.packets)),
 125             len(self.packets))
 126
 127     def write(self):
 128         """Return a string encoding of the page header and data.
 129
 130         A ValueError is raised if the data is too big to fit in a
 131         single page.
 132         """
 133
 134         data = [
 135             struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags,
 136                         self.position, self.serial, self.sequence, 0)
 137             ]
 138
 139         lacing_data = []
 140         for datum in self.packets:
 141             quot, rem = divmod(len(datum), 255)
 142             lacing_data.append("\xff" * quot + chr(rem))
 143         lacing_data = "".join(lacing_data)
 144         if not self.complete and lacing_data.endswith("\x00"):
 145             lacing_data = lacing_data[:-1]
 146         data.append(chr(len(lacing_data)))
 147         data.append(lacing_data)
 148         data.extend(self.packets)
 149         data = "".join(data)
 150
 151         # Python's CRC is swapped relative to Ogg's needs.
 152         crc = ~zlib.crc32(data.translate(cdata.bitswap), -1)
 153         # Although we're using to_int_be, this actually makes the CRC
 154         # a proper le integer, since Python's CRC is byteswapped.
 155         crc = cdata.to_int_be(crc).translate(cdata.bitswap)
 156         data = data[:22] + crc + data[26:]
 157         return data
 158
 159     def __size(self):
 160         size = 27 # Initial header size
 161         for datum in self.packets:
 162             quot, rem = divmod(len(datum), 255)
 163             size += quot + 1
 164         if not self.complete and rem == 0:
 165             # Packet contains a multiple of 255 bytes and is not
 166             # terminated, so we don't have a \x00 at the end.
 167             size -= 1
 168         size += sum(map(len, self.packets))
 169         return size
 170
 171     size = property(__size, doc="Total frame size.")
 172
 173     def __set_flag(self, bit, val):
 174         mask = 1 << bit
 175         if val: self.__type_flags |= mask
 176         else: self.__type_flags &= ~mask
 177
 178     continued = property(
 179         lambda self: cdata.test_bit(self.__type_flags, 0),
 180         lambda self, v: self.__set_flag(0, v),
 181         doc="The first packet is continued from the previous page.")
 182
 183     first = property(
 184         lambda self: cdata.test_bit(self.__type_flags, 1),
 185         lambda self, v: self.__set_flag(1, v),
 186         doc="This is the first page of a logical bitstream.")
 187
 188     last = property(
 189         lambda self: cdata.test_bit(self.__type_flags, 2),
 190         lambda self, v: self.__set_flag(2, v),
 191         doc="This is the last page of a logical bitstream.")
 192
 193     def renumber(klass, fileobj, serial, start):
 194         """Renumber pages belonging to a specified logical stream.
 195
 196         fileobj must be opened with mode r+b or w+b.
 197
 198         Starting at page number 'start', renumber all pages belonging
 199         to logical stream 'serial'. Other pages will be ignored.
 200
 201         fileobj must point to the start of a valid Ogg page; any
 202         occuring after it and part of the specified logical stream
 203         will be numbered. No adjustment will be made to the data in
 204         the pages nor the granule position; only the page number, and
 205         so also the CRC.
 206
 207         If an error occurs (e.g. non-Ogg data is found), fileobj will
 208         be left pointing to the place in the stream the error occured,
 209         but the invalid data will be left intact (since this function
 210         does not change the total file size).
 211         """
 212
 213         number = start
 214         while True:
 215             try: page = OggPage(fileobj)
 216             except EOFError:
 217                 break
 218             else:
 219                 if page.serial != serial:
 220                     # Wrong stream, skip this page.
 221                     continue
 222                 # Changing the number can't change the page size,
 223                 # so seeking back based on the current size is safe.
 224                 fileobj.seek(-page.size, 1)
 225             page.sequence = number
 226             fileobj.write(page.write())
 227             fileobj.seek(page.offset + page.size, 0)
 228             number += 1
 229     renumber = classmethod(renumber)
 230
 231     def to_packets(klass, pages, strict=False):
 232         """Construct a list of packet data from a list of Ogg pages.
 233
 234         If strict is true, the first page must start a new packet,
 235         and the last page must end the last packet.
 236         """
 237
 238         serial = pages[0].serial
 239         sequence = pages[0].sequence
 240         packets = []
 241
 242         if strict:
 243             if pages[0].continued:
 244                 raise ValueError("first packet is continued")
 245             if not pages[-1].complete:
 246                 raise ValueError("last packet does not complete")
 247         elif pages and pages[0].continued:
 248             packets.append("")
 249
 250         for page in pages:
 251             if serial != page.serial:
 252                 raise ValueError("invalid serial number in %r" % page)
 253             elif sequence != page.sequence:
 254                 raise ValueError("bad sequence number in %r" % page)
 255             else: sequence += 1
 256
 257             if page.continued: packets[-1] += page.packets[0]
 258             else: packets.append(page.packets[0])
 259             packets.extend(page.packets[1:])
 260
 261         return packets
 262     to_packets = classmethod(to_packets)
 263
 264     def from_packets(klass, packets, sequence=0,
 265                      default_size=4096, wiggle_room=2048):
 266         """Construct a list of Ogg pages from a list of packet data.
 267
 268         The algorithm will generate pages of approximately
 269         default_size in size (rounded down to the nearest multiple of
 270         255). However, it will also allow pages to increase to
 271         approximately default_size + wiggle_room if allowing the
 272         wiggle room would finish a packet (only one packet will be
 273         finished in this way per page; if the next packet would fit
 274         into the wiggle room, it still starts on a new page).
 275
 276         This method reduces packet fragmentation when packet sizes are
 277         slightly larger than the default page size, while still
 278         ensuring most pages are of the average size.
 279
 280         Pages are numbered started at 'sequence'; other information is
 281         uninitialized.
 282         """
 283
 284         chunk_size = (default_size // 255) * 255
 285
 286         pages = []
 287
 288         page = OggPage()
 289         page.sequence = sequence
 290
 291         for packet in packets:
 292             page.packets.append("")
 293             while packet:
 294                 data, packet = packet[:chunk_size], packet[chunk_size:]
 295                 if page.size < default_size and len(page.packets) < 255:
 296                     page.packets[-1] += data
 297                 else:
 298                     # If we've put any packet data into this page yet,
 299                     # we need to mark it incomplete. However, we can
 300                     # also have just started this packet on an already
 301                     # full page, in which case, just start the new
 302                     # page with this packet.
 303                     if page.packets[-1]:
 304                         page.complete = False
 305                         if len(page.packets) == 1:
 306                             page.position = -1L
 307                     else:
 308                         page.packets.pop(-1)
 309                     pages.append(page)
 310                     page = OggPage()
 311                     page.continued = not pages[-1].complete
 312                     page.sequence = pages[-1].sequence + 1
 313                     page.packets.append(data)
 314
 315                 if len(packet) < wiggle_room:
 316                     page.packets[-1] += packet
 317                     packet = ""
 318
 319         if page.packets:
 320             pages.append(page)
 321
 322         return pages
 323     from_packets = classmethod(from_packets)
 324
 325     def replace(klass, fileobj, old_pages, new_pages):
 326         """Replace old_pages with new_pages within fileobj.
 327
 328         old_pages must have come from reading fileobj originally.
 329         new_pages are assumed to have the 'same' data as old_pages,
 330         and so the serial and sequence numbers will be copied, as will
 331         the flags for the first and last pages.
 332
 333         fileobj will be resized and pages renumbered as necessary. As
 334         such, it must be opened r+b or w+b.
 335         """
 336
 337         # Number the new pages starting from the first old page.
 338         first = old_pages[0].sequence
 339         for page, seq in zip(new_pages, range(first, first + len(new_pages))):
 340             page.sequence = seq
 341             page.serial = old_pages[0].serial
 342
 343         new_pages[0].first = old_pages[0].first
 344         new_pages[0].last = old_pages[0].last
 345         new_pages[0].continued = old_pages[0].continued
 346
 347         new_pages[-1].first = old_pages[-1].first
 348         new_pages[-1].last = old_pages[-1].last
 349         new_pages[-1].complete = old_pages[-1].complete
 350         if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
 351             new_pages[-1].position = -1L
 352
 353         new_data = "".join(map(klass.write, new_pages))
 354
 355         # Make room in the file for the new data.
 356         delta = len(new_data)
 357         fileobj.seek(old_pages[0].offset, 0)
 358         insert_bytes(fileobj, delta, old_pages[0].offset)
 359         fileobj.seek(old_pages[0].offset, 0)
 360         fileobj.write(new_data)
 361         new_data_end = old_pages[0].offset + delta
 362
 363         # Go through the old pages and delete them. Since we shifted
 364         # the data down the file, we need to adjust their offsets. We
 365         # also need to go backwards, so we don't adjust the deltas of
 366         # the other pages.
 367         old_pages.reverse()
 368         for old_page in old_pages:
 369             adj_offset = old_page.offset + delta
 370             delete_bytes(fileobj, old_page.size, adj_offset)
 371
 372         # Finally, if there's any discrepency in length, we need to
 373         # renumber the pages for the logical stream.
 374         if len(old_pages) != len(new_pages):
 375             fileobj.seek(new_data_end, 0)
 376             serial = new_pages[-1].serial
 377             sequence = new_pages[-1].sequence + 1
 378             klass.renumber(fileobj, serial, sequence)
 379     replace = classmethod(replace)
 380
 381     def find_last(klass, fileobj, serial):
 382         """Find the last page of the stream 'serial'.
 383
 384         If the file is not multiplexed this function is fast. If it is,
 385         it must read the whole the stream.
 386
 387         This finds the last page in the actual file object, or the last
 388         page in the stream (with eos set), whichever comes first.
 389         """
 390
 391         # For non-muxed streams, look at the last page.
 392         try: fileobj.seek(-256*256, 2)
 393         except IOError:
 394             # The file is less than 64k in length.
 395             fileobj.seek(0)
 396         data = fileobj.read()
 397         try: index = data.rindex("OggS")
 398         except ValueError:
 399             raise error("unable to find final Ogg header")
 400         stringobj = StringIO(data[index:])
 401         best_page = None
 402         try:
 403             page = OggPage(stringobj)
 404         except error:
 405             pass
 406         else:
 407             if page.serial == serial:
 408                 if page.last: return page
 409                 else: best_page = page
 410             else: best_page = None
 411
 412         # The stream is muxed, so use the slow way.
 413         fileobj.seek(0)
 414         try:
 415             page = OggPage(fileobj)
 416             while not page.last:
 417                 page = OggPage(fileobj)
 418                 while page.serial != serial:
 419                     page = OggPage(fileobj)
 420                 best_page = page
 421             return page
 422         except error:
 423             return best_page
 424         except EOFError:
 425             return best_page
 426     find_last = classmethod(find_last)
 427
 428 class OggFileType(FileType):
 429     """An generic Ogg file."""
 430
 431     _Info = None
 432     _Tags = None
 433     _Error = None
 434     _mimes = ["application/ogg", "application/x-ogg"]
 435
 436     def load(self, filename):
 437         """Load file information from a filename."""
 438
 439         self.filename = filename
 440         fileobj = open(filename, "rb")
 441         try:
 442             try:
 443                 self.info = self._Info(fileobj)
 444                 self.tags = self._Tags(fileobj, self.info)
 445
 446                 if self.info.length:
 447                     # The streaminfo gave us real length information,
 448                     # don't waste time scanning the Ogg.
 449                     return
 450
 451                 last_page = OggPage.find_last(fileobj, self.info.serial)
 452                 samples = last_page.position
 453                 try:
 454                     denom = self.info.sample_rate
 455                 except AttributeError:
 456                     denom = self.info.fps
 457                 self.info.length = samples / float(denom)
 458
 459             except error, e:
 460                 raise self._Error, e, sys.exc_info()[2]
 461             except EOFError:
 462                 raise self._Error, "no appropriate stream found"
 463         finally:
 464             fileobj.close()
 465
 466     def delete(self, filename=None):
 467         """Remove tags from a file.
 468
 469         If no filename is given, the one most recently loaded is used.
 470         """
 471         if filename is None:
 472             filename = self.filename
 473
 474         self.tags.clear()
 475         fileobj = open(filename, "rb+")
 476         try:
 477             try: self.tags._inject(fileobj)
 478             except error, e:
 479                 raise self._Error, e, sys.exc_info()[2]
 480             except EOFError:
 481                 raise self._Error, "no appropriate stream found"
 482         finally:
 483             fileobj.close()
 484
 485     def save(self, filename=None):
 486         """Save a tag to a file.
 487
 488         If no filename is given, the one most recently loaded is used.
 489         """
 490         if filename is None:
 491             filename = self.filename
 492         fileobj = open(filename, "rb+")
 493         try:
 494             try: self.tags._inject(fileobj)
 495             except error, e:
 496                 raise self._Error, e, sys.exc_info()[2]
 497             except EOFError:
 498                 raise self._Error, "no appropriate stream found"
 499         finally:
 500             fileobj.close()