Merge branch 'master' into mutagen-branch
[pyTivo/wmcbrine.git] / mutagen / mp3.py
blob87ec7b987d5cff24daede4f835d97e078144325a
1 # MP3 stream header information support for Mutagen.
2 # Copyright 2006 Joe Wreschnig
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of version 2 of the GNU General Public License as
6 # published by the Free Software Foundation.
8 """MPEG audio stream information and tags."""
10 import os
11 import struct
13 from mutagen.id3 import ID3FileType, BitPaddedInt, delete
15 class error(RuntimeError): pass
16 class HeaderNotFoundError(error, IOError): pass
17 class InvalidMPEGHeader(error, IOError): pass
19 # Mode values.
20 STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4)
22 class MPEGInfo(object):
23 """MPEG audio stream information
25 Parse information about an MPEG audio file. This also reads the
26 Xing VBR header format.
28 This code was implemented based on the format documentation at
29 http://www.dv.co.yu/mpgscript/mpeghdr.htm.
31 Useful attributes:
32 length -- audio length, in seconds
33 bitrate -- audio bitrate, in bits per second
34 sketchy -- if true, the file may not be valid MPEG audio
36 Useless attributes:
37 version -- MPEG version (1, 2, 2.5)
38 layer -- 1, 2, or 3
39 mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3)
40 protected -- whether or not the file is "protected"
41 padding -- whether or not audio frames are padded
42 sample_rate -- audio sample rate, in Hz
43 """
45 # Map (version, layer) tuples to bitrates.
46 __BITRATE = {
47 (1, 1): range(0, 480, 32),
48 (1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384],
49 (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320],
50 (2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256],
51 (2, 2): [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160],
54 __BITRATE[(2, 3)] = __BITRATE[(2, 2)]
55 for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)]
57 # Map version to sample rates.
58 __RATES = {
59 1: [44100, 48000, 32000],
60 2: [22050, 24000, 16000],
61 2.5: [11025, 12000, 8000]
64 sketchy = False
66 def __init__(self, fileobj, offset=None):
67 """Parse MPEG stream information from a file-like object.
69 If an offset argument is given, it is used to start looking
70 for stream information and Xing headers; otherwise, ID3v2 tags
71 will be skipped automatically. A correct offset can make
72 loading files significantly faster.
73 """
75 try: size = os.path.getsize(fileobj.name)
76 except (IOError, OSError, AttributeError):
77 fileobj.seek(0, 2)
78 size = fileobj.tell()
80 # If we don't get an offset, try to skip an ID3v2 tag.
81 if offset is None:
82 fileobj.seek(0, 0)
83 idata = fileobj.read(10)
84 try: id3, insize = struct.unpack('>3sxxx4s', idata)
85 except struct.error: id3, insize = '', 0
86 insize = BitPaddedInt(insize)
87 if id3 == 'ID3' and insize > 0:
88 offset = insize
89 else: offset = 0
91 # Try to find two valid headers (meaning, very likely MPEG data)
92 # at the given offset, 30% through the file, 60% through the file,
93 # and 90% through the file.
94 for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]:
95 try: self.__try(fileobj, int(i), size - offset)
96 except error, e: pass
97 else: break
98 # If we can't find any two consecutive frames, try to find just
99 # one frame back at the original offset given.
100 else:
101 self.__try(fileobj, offset, size - offset, False)
102 self.sketchy = True
104 def __try(self, fileobj, offset, real_size, check_second=True):
105 # This is going to be one really long function; bear with it,
106 # because there's not really a sane point to cut it up.
107 fileobj.seek(offset, 0)
109 # We "know" we have an MPEG file if we find two frames that look like
110 # valid MPEG data. If we can't find them in 32k of reads, something
111 # is horribly wrong (the longest frame can only be about 4k). This
112 # is assuming the offset didn't lie.
113 data = fileobj.read(32768)
115 frame_1 = data.find("\xff")
116 while 0 <= frame_1 <= len(data) - 4:
117 frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0]
118 if (frame_data >> 16) & 0xE0 != 0xE0:
119 frame_1 = data.find("\xff", frame_1 + 2)
120 else:
121 version = (frame_data >> 19) & 0x3
122 layer = (frame_data >> 17) & 0x3
123 protection = (frame_data >> 16) & 0x1
124 bitrate = (frame_data >> 12) & 0xF
125 sample_rate = (frame_data >> 10) & 0x3
126 padding = (frame_data >> 9) & 0x1
127 private = (frame_data >> 8) & 0x1
128 self.mode = (frame_data >> 6) & 0x3
129 mode_extension = (frame_data >> 4) & 0x3
130 copyright = (frame_data >> 3) & 0x1
131 original = (frame_data >> 2) & 0x1
132 emphasis = (frame_data >> 0) & 0x3
133 if (version == 1 or layer == 0 or sample_rate == 0x3 or
134 bitrate == 0 or bitrate == 0xF):
135 frame_1 = data.find("\xff", frame_1 + 2)
136 else: break
137 else:
138 raise HeaderNotFoundError("can't sync to an MPEG frame")
140 # There is a serious problem here, which is that many flags
141 # in an MPEG header are backwards.
142 self.version = [2.5, None, 2, 1][version]
143 self.layer = 4 - layer
144 self.protected = not protection
145 self.padding = bool(padding)
147 self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate]
148 self.bitrate *= 1000
149 self.sample_rate = self.__RATES[self.version][sample_rate]
151 if self.layer == 1:
152 frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4
153 frame_size = 384
154 else:
155 frame_length = 144 * self.bitrate / self.sample_rate + padding
156 frame_size = 1152
158 if check_second:
159 possible = frame_1 + frame_length
160 if possible > len(data) + 4:
161 raise HeaderNotFoundError("can't sync to second MPEG frame")
162 frame_data = struct.unpack(">H", data[possible:possible + 2])[0]
163 if frame_data & 0xFFE0 != 0xFFE0:
164 raise HeaderNotFoundError("can't sync to second MPEG frame")
166 frame_count = real_size / float(frame_length)
167 samples = frame_size * frame_count
168 self.length = samples / self.sample_rate
170 # Try to find/parse the Xing header, which trumps the above length
171 # and bitrate calculation.
172 fileobj.seek(offset, 0)
173 data = fileobj.read(32768)
174 try:
175 xing = data[:-4].index("Xing")
176 except ValueError:
177 # Try to find/parse the VBRI header, which trumps the above length
178 # calculation.
179 try:
180 vbri = data[:-24].index("VBRI")
181 except ValueError: pass
182 else:
183 # If a VBRI header was found, this is definitely MPEG audio.
184 self.sketchy = False
185 vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0]
186 if vbri_version == 1:
187 frame_count = struct.unpack(
188 '>I', data[vbri + 14:vbri + 18])[0]
189 samples = frame_size * frame_count
190 self.length = (samples / self.sample_rate) or self.length
191 else:
192 # If a Xing header was found, this is definitely MPEG audio.
193 self.sketchy = False
194 flags = struct.unpack('>I', data[xing + 4:xing + 8])[0]
195 if flags & 0x1:
196 frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0]
197 samples = frame_size * frame_count
198 self.length = (samples / self.sample_rate) or self.length
199 if flags & 0x2:
200 bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0]
201 self.bitrate = int((bytes * 8) // self.length)
203 # If the bitrate * the length is nowhere near the file
204 # length, recalculate using the bitrate and file length.
205 # Don't do this for very small files.
206 fileobj.seek(2, 0)
207 size = fileobj.tell()
208 expected = (self.bitrate / 8) * self.length
209 if not (size / 2 < expected < size * 2) and size > 2**16:
210 self.length = size / float(self.bitrate * 8)
212 def pprint(self):
213 s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % (
214 self.version, self.layer, self.bitrate, self.sample_rate,
215 self.length)
216 if self.sketchy: s += " (sketchy)"
217 return s
219 class MP3(ID3FileType):
220 """An MPEG audio (usually MPEG-1 Layer 3) file."""
222 _Info = MPEGInfo
223 _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg",
224 "audio/x-mpeg"]
226 def score(filename, fileobj, header):
227 filename = filename.lower()
228 return (header.startswith("ID3") * 2 + filename.endswith(".mp3") +
229 filename.endswith(".mp2") + filename.endswith(".mpg") +
230 filename.endswith(".mpeg"))
231 score = staticmethod(score)
233 Open = MP3