1 # Copyright 2006 Joe Wreschnig
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: ogg.py 3975 2007-01-13 21:51:17Z piman $
9 """Read and write Ogg bitstreams and pages.
11 This module reads and writes a subset of the Ogg bitstream format
12 version 0. It does *not* read or write Ogg Vorbis files! For that,
13 you should use mutagen.oggvorbis.
15 This implementation is based on the RFC 3533 standard found at
16 http://www.xiph.org/ogg/doc/rfc3533.txt.
23 from cStringIO
import StringIO
25 from mutagen
import FileType
26 from mutagen
._util
import cdata
, insert_bytes
, delete_bytes
29 """Ogg stream parsing errors."""
32 class OggPage(object):
33 """A single Ogg page (not necessarily a single encoded packet).
35 A page is a header of 26 bytes, followed by the length of the
36 data, followed by the data.
38 The constructor is givin a file-like object pointing to the start
39 of an Ogg page. After the constructor is finished it is pointing
40 to the start of the next page.
43 version -- stream structure version (currently always 0)
44 position -- absolute stream position (default -1)
45 serial -- logical stream serial number (default 0)
46 sequence -- page sequence number within logical stream (default 0)
47 offset -- offset this page was read from (default None)
48 complete -- if the last packet on this page is complete (default True)
49 packets -- list of raw packet data (default [])
51 Note that if 'complete' is false, the next page's 'continued'
52 property must be true (so set both when constructing pages).
54 If a file-like object is supplied to the constructor, the above
55 attributes will be filled in based on it.
66 def __init__(self
, fileobj
=None):
72 self
.offset
= fileobj
.tell()
74 header
= fileobj
.read(27)
79 (oggs
, self
.version
, self
.__type
_flags
, self
.position
,
80 self
.serial
, self
.sequence
, crc
, segments
) = struct
.unpack(
83 raise error("unable to read full header; got %r" % header
)
86 raise error("read %r, expected %r, at 0x%x" % (
87 oggs
, "OggS", fileobj
.tell() - 27))
90 raise error("version %r unsupported" % self
.version
)
94 lacing_bytes
= fileobj
.read(segments
)
95 if len(lacing_bytes
) != segments
:
96 raise error("unable to read %r lacing bytes" % segments
)
97 for c
in map(ord, lacing_bytes
):
100 lacings
.append(total
)
103 lacings
.append(total
)
104 self
.complete
= False
106 self
.packets
= map(fileobj
.read
, lacings
)
107 if map(len, self
.packets
) != lacings
:
108 raise error("unable to read full data")
110 def __eq__(self
, other
):
111 """Two Ogg pages are the same if they write the same data."""
113 return (self
.write() == other
.write())
114 except AttributeError:
117 __hash__
= object.__hash
__
120 attrs
= ['version', 'position', 'serial', 'sequence', 'offset',
121 'complete', 'continued', 'first', 'last']
122 values
= ["%s=%r" % (attr
, getattr(self
, attr
)) for attr
in attrs
]
123 return "<%s %s, %d bytes in %d packets>" % (
124 type(self
).__name
__, " ".join(values
), sum(map(len, self
.packets
)),
128 """Return a string encoding of the page header and data.
130 A ValueError is raised if the data is too big to fit in a
135 struct
.pack("<4sBBqIIi", "OggS", self
.version
, self
.__type
_flags
,
136 self
.position
, self
.serial
, self
.sequence
, 0)
140 for datum
in self
.packets
:
141 quot
, rem
= divmod(len(datum
), 255)
142 lacing_data
.append("\xff" * quot
+ chr(rem
))
143 lacing_data
= "".join(lacing_data
)
144 if not self
.complete
and lacing_data
.endswith("\x00"):
145 lacing_data
= lacing_data
[:-1]
146 data
.append(chr(len(lacing_data
)))
147 data
.append(lacing_data
)
148 data
.extend(self
.packets
)
151 # Python's CRC is swapped relative to Ogg's needs.
152 crc
= ~zlib
.crc32(data
.translate(cdata
.bitswap
), -1)
153 # Although we're using to_int_be, this actually makes the CRC
154 # a proper le integer, since Python's CRC is byteswapped.
155 crc
= cdata
.to_int_be(crc
).translate(cdata
.bitswap
)
156 data
= data
[:22] + crc
+ data
[26:]
160 size
= 27 # Initial header size
161 for datum
in self
.packets
:
162 quot
, rem
= divmod(len(datum
), 255)
164 if not self
.complete
and rem
== 0:
165 # Packet contains a multiple of 255 bytes and is not
166 # terminated, so we don't have a \x00 at the end.
168 size
+= sum(map(len, self
.packets
))
171 size
= property(__size
, doc
="Total frame size.")
173 def __set_flag(self
, bit
, val
):
175 if val
: self
.__type
_flags |
= mask
176 else: self
.__type
_flags
&= ~mask
178 continued
= property(
179 lambda self
: cdata
.test_bit(self
.__type
_flags
, 0),
180 lambda self
, v
: self
.__set
_flag
(0, v
),
181 doc
="The first packet is continued from the previous page.")
184 lambda self
: cdata
.test_bit(self
.__type
_flags
, 1),
185 lambda self
, v
: self
.__set
_flag
(1, v
),
186 doc
="This is the first page of a logical bitstream.")
189 lambda self
: cdata
.test_bit(self
.__type
_flags
, 2),
190 lambda self
, v
: self
.__set
_flag
(2, v
),
191 doc
="This is the last page of a logical bitstream.")
193 def renumber(klass
, fileobj
, serial
, start
):
194 """Renumber pages belonging to a specified logical stream.
196 fileobj must be opened with mode r+b or w+b.
198 Starting at page number 'start', renumber all pages belonging
199 to logical stream 'serial'. Other pages will be ignored.
201 fileobj must point to the start of a valid Ogg page; any
202 occuring after it and part of the specified logical stream
203 will be numbered. No adjustment will be made to the data in
204 the pages nor the granule position; only the page number, and
207 If an error occurs (e.g. non-Ogg data is found), fileobj will
208 be left pointing to the place in the stream the error occured,
209 but the invalid data will be left intact (since this function
210 does not change the total file size).
215 try: page
= OggPage(fileobj
)
219 if page
.serial
!= serial
:
220 # Wrong stream, skip this page.
222 # Changing the number can't change the page size,
223 # so seeking back based on the current size is safe.
224 fileobj
.seek(-page
.size
, 1)
225 page
.sequence
= number
226 fileobj
.write(page
.write())
227 fileobj
.seek(page
.offset
+ page
.size
, 0)
229 renumber
= classmethod(renumber
)
231 def to_packets(klass
, pages
, strict
=False):
232 """Construct a list of packet data from a list of Ogg pages.
234 If strict is true, the first page must start a new packet,
235 and the last page must end the last packet.
238 serial
= pages
[0].serial
239 sequence
= pages
[0].sequence
243 if pages
[0].continued
:
244 raise ValueError("first packet is continued")
245 if not pages
[-1].complete
:
246 raise ValueError("last packet does not complete")
247 elif pages
and pages
[0].continued
:
251 if serial
!= page
.serial
:
252 raise ValueError("invalid serial number in %r" % page
)
253 elif sequence
!= page
.sequence
:
254 raise ValueError("bad sequence number in %r" % page
)
257 if page
.continued
: packets
[-1] += page
.packets
[0]
258 else: packets
.append(page
.packets
[0])
259 packets
.extend(page
.packets
[1:])
262 to_packets
= classmethod(to_packets
)
264 def from_packets(klass
, packets
, sequence
=0,
265 default_size
=4096, wiggle_room
=2048):
266 """Construct a list of Ogg pages from a list of packet data.
268 The algorithm will generate pages of approximately
269 default_size in size (rounded down to the nearest multiple of
270 255). However, it will also allow pages to increase to
271 approximately default_size + wiggle_room if allowing the
272 wiggle room would finish a packet (only one packet will be
273 finished in this way per page; if the next packet would fit
274 into the wiggle room, it still starts on a new page).
276 This method reduces packet fragmentation when packet sizes are
277 slightly larger than the default page size, while still
278 ensuring most pages are of the average size.
280 Pages are numbered started at 'sequence'; other information is
284 chunk_size
= (default_size
// 255) * 255
289 page
.sequence
= sequence
291 for packet
in packets
:
292 page
.packets
.append("")
294 data
, packet
= packet
[:chunk_size
], packet
[chunk_size
:]
295 if page
.size
< default_size
and len(page
.packets
) < 255:
296 page
.packets
[-1] += data
298 # If we've put any packet data into this page yet,
299 # we need to mark it incomplete. However, we can
300 # also have just started this packet on an already
301 # full page, in which case, just start the new
302 # page with this packet.
304 page
.complete
= False
305 if len(page
.packets
) == 1:
311 page
.continued
= not pages
[-1].complete
312 page
.sequence
= pages
[-1].sequence
+ 1
313 page
.packets
.append(data
)
315 if len(packet
) < wiggle_room
:
316 page
.packets
[-1] += packet
323 from_packets
= classmethod(from_packets
)
325 def replace(klass
, fileobj
, old_pages
, new_pages
):
326 """Replace old_pages with new_pages within fileobj.
328 old_pages must have come from reading fileobj originally.
329 new_pages are assumed to have the 'same' data as old_pages,
330 and so the serial and sequence numbers will be copied, as will
331 the flags for the first and last pages.
333 fileobj will be resized and pages renumbered as necessary. As
334 such, it must be opened r+b or w+b.
337 # Number the new pages starting from the first old page.
338 first
= old_pages
[0].sequence
339 for page
, seq
in zip(new_pages
, range(first
, first
+ len(new_pages
))):
341 page
.serial
= old_pages
[0].serial
343 new_pages
[0].first
= old_pages
[0].first
344 new_pages
[0].last
= old_pages
[0].last
345 new_pages
[0].continued
= old_pages
[0].continued
347 new_pages
[-1].first
= old_pages
[-1].first
348 new_pages
[-1].last
= old_pages
[-1].last
349 new_pages
[-1].complete
= old_pages
[-1].complete
350 if not new_pages
[-1].complete
and len(new_pages
[-1].packets
) == 1:
351 new_pages
[-1].position
= -1L
353 new_data
= "".join(map(klass
.write
, new_pages
))
355 # Make room in the file for the new data.
356 delta
= len(new_data
)
357 fileobj
.seek(old_pages
[0].offset
, 0)
358 insert_bytes(fileobj
, delta
, old_pages
[0].offset
)
359 fileobj
.seek(old_pages
[0].offset
, 0)
360 fileobj
.write(new_data
)
361 new_data_end
= old_pages
[0].offset
+ delta
363 # Go through the old pages and delete them. Since we shifted
364 # the data down the file, we need to adjust their offsets. We
365 # also need to go backwards, so we don't adjust the deltas of
368 for old_page
in old_pages
:
369 adj_offset
= old_page
.offset
+ delta
370 delete_bytes(fileobj
, old_page
.size
, adj_offset
)
372 # Finally, if there's any discrepency in length, we need to
373 # renumber the pages for the logical stream.
374 if len(old_pages
) != len(new_pages
):
375 fileobj
.seek(new_data_end
, 0)
376 serial
= new_pages
[-1].serial
377 sequence
= new_pages
[-1].sequence
+ 1
378 klass
.renumber(fileobj
, serial
, sequence
)
379 replace
= classmethod(replace
)
381 def find_last(klass
, fileobj
, serial
):
382 """Find the last page of the stream 'serial'.
384 If the file is not multiplexed this function is fast. If it is,
385 it must read the whole the stream.
387 This finds the last page in the actual file object, or the last
388 page in the stream (with eos set), whichever comes first.
391 # For non-muxed streams, look at the last page.
392 try: fileobj
.seek(-256*256, 2)
394 # The file is less than 64k in length.
396 data
= fileobj
.read()
397 try: index
= data
.rindex("OggS")
399 raise error("unable to find final Ogg header")
400 stringobj
= StringIO(data
[index
:])
403 page
= OggPage(stringobj
)
407 if page
.serial
== serial
:
408 if page
.last
: return page
409 else: best_page
= page
410 else: best_page
= None
412 # The stream is muxed, so use the slow way.
415 page
= OggPage(fileobj
)
417 page
= OggPage(fileobj
)
418 while page
.serial
!= serial
:
419 page
= OggPage(fileobj
)
426 find_last
= classmethod(find_last
)
428 class OggFileType(FileType
):
429 """An generic Ogg file."""
434 _mimes
= ["application/ogg", "application/x-ogg"]
436 def load(self
, filename
):
437 """Load file information from a filename."""
439 self
.filename
= filename
440 fileobj
= open(filename
, "rb")
443 self
.info
= self
._Info
(fileobj
)
444 self
.tags
= self
._Tags
(fileobj
, self
.info
)
447 # The streaminfo gave us real length information,
448 # don't waste time scanning the Ogg.
451 last_page
= OggPage
.find_last(fileobj
, self
.info
.serial
)
452 samples
= last_page
.position
454 denom
= self
.info
.sample_rate
455 except AttributeError:
456 denom
= self
.info
.fps
457 self
.info
.length
= samples
/ float(denom
)
460 raise self
._Error
, e
, sys
.exc_info()[2]
462 raise self
._Error
, "no appropriate stream found"
466 def delete(self
, filename
=None):
467 """Remove tags from a file.
469 If no filename is given, the one most recently loaded is used.
472 filename
= self
.filename
475 fileobj
= open(filename
, "rb+")
477 try: self
.tags
._inject
(fileobj
)
479 raise self
._Error
, e
, sys
.exc_info()[2]
481 raise self
._Error
, "no appropriate stream found"
485 def save(self
, filename
=None):
486 """Save a tag to a file.
488 If no filename is given, the one most recently loaded is used.
491 filename
= self
.filename
492 fileobj
= open(filename
, "rb+")
494 try: self
.tags
._inject
(fileobj
)
496 raise self
._Error
, e
, sys
.exc_info()[2]
498 raise self
._Error
, "no appropriate stream found"