mutagen/mp3.py

   1 # MP3 stream header information support for Mutagen.
   2 # Copyright 2006 Joe Wreschnig
   3 #
   4 # This program is free software; you can redistribute it and/or modify
   5 # it under the terms of version 2 of the GNU General Public License as
   6 # published by the Free Software Foundation.
   7
   8 """MPEG audio stream information and tags."""
   9
  10 import os
  11 import struct
  12
  13 from mutagen.id3 import ID3FileType, BitPaddedInt, delete
  14
  15 __all__ = ["MP3", "Open", "delete", "MP3"]
  16
  17 class error(RuntimeError): pass
  18 class HeaderNotFoundError(error, IOError): pass
  19 class InvalidMPEGHeader(error, IOError): pass
  20
  21 # Mode values.
  22 STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4)
  23
  24 class MPEGInfo(object):
  25     """MPEG audio stream information
  26
  27     Parse information about an MPEG audio file. This also reads the
  28     Xing VBR header format.
  29
  30     This code was implemented based on the format documentation at
  31     http://www.dv.co.yu/mpgscript/mpeghdr.htm.
  32
  33     Useful attributes:
  34     length -- audio length, in seconds
  35     bitrate -- audio bitrate, in bits per second
  36     sketchy -- if true, the file may not be valid MPEG audio
  37
  38     Useless attributes:
  39     version -- MPEG version (1, 2, 2.5)
  40     layer -- 1, 2, or 3
  41     mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3)
  42     protected -- whether or not the file is "protected"
  43     padding -- whether or not audio frames are padded
  44     sample_rate -- audio sample rate, in Hz
  45     """
  46
  47     # Map (version, layer) tuples to bitrates.
  48     __BITRATE = {
  49         (1, 1): range(0, 480, 32),
  50         (1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384],
  51         (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320],
  52         (2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256],
  53         (2, 2): [0,  8, 16, 24, 32, 40, 48,  56, 64, 80, 96,112,128,144,160],
  54         }
  55
  56     __BITRATE[(2, 3)] = __BITRATE[(2, 2)]
  57     for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)]
  58
  59     # Map version to sample rates.
  60     __RATES = {
  61         1: [44100, 48000, 32000],
  62         2: [22050, 24000, 16000],
  63         2.5: [11025, 12000, 8000]
  64         }
  65
  66     sketchy = False
  67
  68     def __init__(self, fileobj, offset=None):
  69         """Parse MPEG stream information from a file-like object.
  70
  71         If an offset argument is given, it is used to start looking
  72         for stream information and Xing headers; otherwise, ID3v2 tags
  73         will be skipped automatically. A correct offset can make
  74         loading files significantly faster.
  75         """
  76
  77         try: size = os.path.getsize(fileobj.name)
  78         except (IOError, OSError, AttributeError):
  79             fileobj.seek(0, 2)
  80             size = fileobj.tell()
  81
  82         # If we don't get an offset, try to skip an ID3v2 tag.
  83         if offset is None:
  84             fileobj.seek(0, 0)
  85             idata = fileobj.read(10)
  86             try: id3, insize = struct.unpack('>3sxxx4s', idata)
  87             except struct.error: id3, insize = '', 0
  88             insize = BitPaddedInt(insize)
  89             if id3 == 'ID3' and insize > 0:
  90                 offset = insize
  91             else: offset = 0
  92
  93         # Try to find two valid headers (meaning, very likely MPEG data)
  94         # at the given offset, 30% through the file, 60% through the file,
  95         # and 90% through the file.
  96         for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]:
  97             try: self.__try(fileobj, int(i), size - offset)
  98             except error, e: pass
  99             else: break
 100         # If we can't find any two consecutive frames, try to find just
 101         # one frame back at the original offset given.
 102         else:
 103             self.__try(fileobj, offset, size - offset, False)
 104             self.sketchy = True
 105
 106     def __try(self, fileobj, offset, real_size, check_second=True):
 107         # This is going to be one really long function; bear with it,
 108         # because there's not really a sane point to cut it up.
 109         fileobj.seek(offset, 0)
 110
 111         # We "know" we have an MPEG file if we find two frames that look like
 112         # valid MPEG data. If we can't find them in 32k of reads, something
 113         # is horribly wrong (the longest frame can only be about 4k). This
 114         # is assuming the offset didn't lie.
 115         data = fileobj.read(32768)
 116
 117         frame_1 = data.find("\xff")
 118         while 0 <= frame_1 <= len(data) - 4:
 119             frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0]
 120             if (frame_data >> 16) & 0xE0 != 0xE0:
 121                 frame_1 = data.find("\xff", frame_1 + 2)
 122             else:
 123                 version = (frame_data >> 19) & 0x3
 124                 layer = (frame_data >> 17) & 0x3
 125                 protection = (frame_data >> 16) & 0x1
 126                 bitrate = (frame_data >> 12) & 0xF
 127                 sample_rate = (frame_data >> 10) & 0x3
 128                 padding = (frame_data >> 9) & 0x1
 129                 private = (frame_data >> 8) & 0x1
 130                 self.mode = (frame_data >> 6) & 0x3
 131                 mode_extension = (frame_data >> 4) & 0x3
 132                 copyright = (frame_data >> 3) & 0x1
 133                 original = (frame_data >> 2) & 0x1
 134                 emphasis = (frame_data >> 0) & 0x3
 135                 if (version == 1 or layer == 0 or sample_rate == 0x3 or
 136                     bitrate == 0 or bitrate == 0xF):
 137                     frame_1 = data.find("\xff", frame_1 + 2)
 138                 else: break
 139         else:
 140             raise HeaderNotFoundError("can't sync to an MPEG frame")
 141
 142         # There is a serious problem here, which is that many flags
 143         # in an MPEG header are backwards.
 144         self.version = [2.5, None, 2, 1][version]
 145         self.layer = 4 - layer
 146         self.protected = not protection
 147         self.padding = bool(padding)
 148
 149         self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate]
 150         self.bitrate *= 1000
 151         self.sample_rate = self.__RATES[self.version][sample_rate]
 152
 153         if self.layer == 1:
 154             frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4
 155             frame_size = 384
 156         elif self.version >= 2 and self.layer == 3:
 157             frame_length = 72 * self.bitrate / self.sample_rate + padding
 158             frame_size = 576
 159         else:
 160             frame_length = 144 * self.bitrate / self.sample_rate + padding
 161             frame_size = 1152
 162
 163         if check_second:
 164             possible = frame_1 + frame_length
 165             if possible > len(data) + 4:
 166                 raise HeaderNotFoundError("can't sync to second MPEG frame")
 167             try:
 168                 frame_data = struct.unpack(
 169                     ">H", data[possible:possible + 2])[0]
 170             except struct.error:
 171                 raise HeaderNotFoundError("can't sync to second MPEG frame")
 172             if frame_data & 0xFFE0 != 0xFFE0:
 173                 raise HeaderNotFoundError("can't sync to second MPEG frame")
 174
 175         frame_count = real_size / float(frame_length)
 176         samples = frame_size * frame_count
 177         self.length = samples / self.sample_rate
 178
 179         # Try to find/parse the Xing header, which trumps the above length
 180         # and bitrate calculation.
 181         fileobj.seek(offset, 0)
 182         data = fileobj.read(32768)
 183         try:
 184             xing = data[:-4].index("Xing")
 185         except ValueError:
 186             # Try to find/parse the VBRI header, which trumps the above length
 187             # calculation.
 188             try:
 189                 vbri = data[:-24].index("VBRI")
 190             except ValueError: pass
 191             else:
 192                 # If a VBRI header was found, this is definitely MPEG audio.
 193                 self.sketchy = False
 194                 vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0]
 195                 if vbri_version == 1:
 196                     frame_count = struct.unpack(
 197                         '>I', data[vbri + 14:vbri + 18])[0]
 198                     samples = float(frame_size * frame_count)
 199                     self.length = (samples / self.sample_rate) or self.length
 200         else:
 201             # If a Xing header was found, this is definitely MPEG audio.
 202             self.sketchy = False
 203             flags = struct.unpack('>I', data[xing + 4:xing + 8])[0]
 204             if flags & 0x1:
 205                 frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0]
 206                 samples = float(frame_size * frame_count)
 207                 self.length = (samples / self.sample_rate) or self.length
 208             if flags & 0x2:
 209                 bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0]
 210                 self.bitrate = int((bytes * 8) // self.length)
 211
 212         # If the bitrate * the length is nowhere near the file
 213         # length, recalculate using the bitrate and file length.
 214         # Don't do this for very small files.
 215         fileobj.seek(2, 0)
 216         size = fileobj.tell()
 217         expected = (self.bitrate / 8) * self.length
 218         if not (size / 2 < expected < size * 2) and size > 2**16:
 219             self.length = size / float(self.bitrate * 8)
 220
 221     def pprint(self):
 222         s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % (
 223             self.version, self.layer, self.bitrate, self.sample_rate,
 224             self.length)
 225         if self.sketchy: s += " (sketchy)"
 226         return s
 227
 228 class MP3(ID3FileType):
 229     """An MPEG audio (usually MPEG-1 Layer 3) file."""
 230
 231     _Info = MPEGInfo
 232     _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg",
 233               "audio/x-mpeg"]
 234
 235     def score(filename, fileobj, header):
 236         filename = filename.lower()
 237         return (header.startswith("ID3") * 2 + filename.endswith(".mp3") +
 238                 filename.endswith(".mp2") + filename.endswith(".mpg") +
 239                 filename.endswith(".mpeg"))
 240     score = staticmethod(score)
 241
 242 Open = MP3
 243
 244 class EasyMP3(MP3):
 245     """Like MP3, but uses EasyID3 for tags."""
 246     from mutagen.easyid3 import EasyID3 as ID3
 247