General formatting clean-up.
[xiph/unicode.git] / theora / include / theora / codec.h
blob5e565da37ee16228ddef2f300f3b5e035c5d5435
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function:
14 last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
16 ********************************************************************/
18 /**\mainpage
20 * \section intro Introduction
22 * This is the documentation for <tt>libtheora</tt> C API.
23 * The current reference
24 * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
25 * patent-unencumbered video codec.
26 * Theora is derived from On2's VP3 codec with additional features and
27 * integration for Ogg multimedia formats by
28 * <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
29 * Complete documentation of the format itself is available in
30 * <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
31 * specification</a>.
33 * \subsection Organization
35 * The functions documented here are actually subdivided into two separate
36 * libraries:
37 * - <tt>libtheoradec</tt>, contains the decoder and shared routines.
38 * You must link to this if you use any of the functions listed in
39 * this API, i.e., those listed in \ref basefuncs and \ref decfuncs.*/
41 /**\file
42 * The shared <tt>libtheoradec</tt> and <tt>libtheoraenc</tt> C API.*/
44 #if !defined(_O_THEORA_CODEC_H_)
45 # define _O_THEORA_CODEC_H_ (1)
46 # include <ogg/ogg.h>
48 #if defined(__cplusplus)
49 extern "C" {
50 #endif
54 /**\name Return codes*/
55 /*@{*/
56 /**An invalid pointer was provided.*/
57 #define TH_EFAULT (-1)
58 /**An invalid argument was provided.*/
59 #define TH_EINVAL (-10)
60 /**The contents of the header were incomplete, invalid, or unexpected.*/
61 #define TH_EBADHEADER (-20)
62 /**The header does not belong to a Theora stream.*/
63 #define TH_ENOTFORMAT (-21)
64 /**The bitstream version is too high.*/
65 #define TH_EVERSION (-22)
66 /**The specified function is not implemented.*/
67 #define TH_EIMPL (-23)
68 /**There were errors in the video data packet.*/
69 #define TH_EBADPACKET (-24)
70 /**The decoded packet represented a dropped frame.
71 The player can continue to display the current frame, as the contents of the
72 decoded frame buffer have not changed.*/
73 #define TH_DUPFRAME (1)
74 /*@}*/
76 /**The currently defined color space tags.
77 * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
78 * specification</a>, Chapter 4, for exact details on the meaning of each of
79 * these color spaces.*/
80 typedef enum{
81 /**The color space was not specified at the encoder.
82 It may be conveyed by an external means.*/
83 TH_CS_UNSPECIFIED,
84 /**A color space designed for NTSC content.*/
85 TH_CS_ITU_REC_470M,
86 /**A color space designed for PAL/SECAM content.*/
87 TH_CS_ITU_REC_470BG,
88 /**The total number of currently defined color spaces.*/
89 TH_CS_NSPACES
90 }th_colorspace;
92 /**The currently defined pixel format tags.
93 * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
94 * specification</a>, Section 4.4, for details on the precise sample
95 * locations.*/
96 typedef enum{
97 /**Chroma decimation by 2 in both the X and Y directions (4:2:0).*/
98 TH_PF_420,
99 /**Currently reserved.*/
100 TH_PF_RSVD,
101 /**Chroma decimation by 2 in the X direction (4:2:2).*/
102 TH_PF_422,
103 /**No chroma decimation (4:4:4).*/
104 TH_PF_444,
105 /**The total number of currently defined pixel formats.*/
106 TH_PF_NFORMATS
107 }th_pixel_fmt;
111 /**A buffer for a single color plane in an uncompressed image.
112 * This contains the image data in a left-to-right, top-down format.
113 * Each row of pixels is stored contiguously in memory, but successive rows
114 * need not be.
115 * Use \a ystride to compute the offset of the next row.
116 * The encoder accepts both positive \a ystride values (top-down in memory) and
117 * negative (bottom-up in memory).
118 * The decoder currently always generates images with positive strides.*/
119 typedef struct{
120 /**The width of this plane.*/
121 int width;
122 /**The height of this plane.*/
123 int height;
124 /**The offset in bytes between successive rows.*/
125 int ystride;
126 /**A pointer to the beginning of the first row.*/
127 unsigned char *data;
128 }th_img_plane;
130 /**A complete image buffer for an uncompressed frame.
131 * The chroma planes may be decimated by a factor of two in either direction,
132 * as indicated by th_info#pixel_fmt.
133 * The width and height of the Y' plane must be multiples of 16.
134 * They may need to be cropped for display, using the rectangle specified by
135 * th_info#pic_x, th_info#pic_y, th_info#pic_width, and
136 * th_info#pic_height.
137 * All samples are 8 bits.
138 * \note The term YUV often used to describe a colorspace is ambiguous.
139 * The exact parameters of the RGB to YUV conversion process aside, in many
140 * contexts the U and V channels actually have opposite meanings.
141 * To avoid this confusion, we are explicit: the name of the color channels are
142 * Y'CbCr, and they appear in that order, always.
143 * The prime symbol denotes that the Y channel is non-linear.
144 * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
145 typedef th_img_plane th_ycbcr_buffer[3];
147 /**Theora bitstream information.
148 * This contains the basic playback parameters for a stream, and corresponds to
149 * the initial 'info' header packet.
150 * To initialize an encoder, the application fills in this structure and
151 * passes it to th_encode_alloc().
152 * A default encoding mode is chosen based on the values of the #quality and
153 * #target_bitrate fields.
154 * On decode, it is filled in by th_decode_headerin(), and then passed to
155 * th_decode_alloc().
157 * Encoded Theora frames must be a multiple of 16 in size;
158 * this is what the #frame_width and #frame_height members represent.
159 * To handle arbitrary picture sizes, a crop rectangle is specified in the
160 * #pic_x, #pic_y, #pic_width and #pic_height members.
162 * All frame buffers contain pointers to the full, padded frame.
163 * However, the current encoder <em>will not</em> reference pixels outside of
164 * the cropped picture region, and the application does not need to fill them
165 * in.
166 * The decoder <em>will</em> allocate storage for a full frame, but the
167 * application <em>should not</em> rely on the padding containing sensible
168 * data.
170 * It is also generally recommended that the offsets and sizes should still be
171 * multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
172 * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
173 * specification</a>, Section 4.4, for more details.
175 * Frame rate, in frames per second, is stored as a rational fraction, as is
176 * the pixel aspect ratio.
177 * Note that this refers to the aspect ratio of the individual pixels, not of
178 * the overall frame itself.
179 * The frame aspect ratio can be computed from pixel aspect ratio using the
180 * image dimensions.*/
181 typedef struct{
182 /**\name Theora version
183 * Bitstream version information.*/
184 /*@{*/
185 unsigned char version_major;
186 unsigned char version_minor;
187 unsigned char version_subminor;
188 /*@}*/
189 /**The encoded frame width.
190 * This must be a multiple of 16, and less than 1048576.*/
191 ogg_uint32_t frame_width;
192 /**The encoded frame height.
193 * This must be a multiple of 16, and less than 1048576.*/
194 ogg_uint32_t frame_height;
195 /**The displayed picture width.
196 * This must be no larger than width.*/
197 ogg_uint32_t pic_width;
198 /**The displayed picture height.
199 * This must be no larger than height.*/
200 ogg_uint32_t pic_height;
201 /**The X offset of the displayed picture.
202 * This must be no larger than #frame_width-#pic_width or 255, whichever is
203 * smaller.*/
204 ogg_uint32_t pic_x;
205 /**The Y offset of the displayed picture.
206 * This must be no larger than #frame_height-#pic_height, and
207 * #frame_height-#pic_height-#pic_y must be no larger than 255.
208 * This slightly funny restriction is due to the fact that the offset is
209 * specified from the top of the image for consistency with the standard
210 * graphics left-handed coordinate system used throughout this API, while it
211 * is stored in the encoded stream as an offset from the bottom.*/
212 ogg_uint32_t pic_y;
213 /**\name Frame rate
214 * The frame rate, as a fraction.
215 * If either is 0, the frame rate is undefined.*/
216 /*@{*/
217 ogg_uint32_t fps_numerator;
218 ogg_uint32_t fps_denominator;
219 /*@}*/
220 /**\name Aspect ratio
221 * The aspect ratio of the pixels.
222 * If either value is zero, the aspect ratio is undefined.
223 * If not specified by any external means, 1:1 should be assumed.
224 * The aspect ratio of the full picture can be computed as
225 * \code
226 * aspect_numerator*pic_width/(aspect_denominator*pic_height).
227 * \endcode */
228 /*@{*/
229 ogg_uint32_t aspect_numerator;
230 ogg_uint32_t aspect_denominator;
231 /*@}*/
232 /**The color space.*/
233 th_colorspace colorspace;
234 /**The pixel format.*/
235 th_pixel_fmt pixel_fmt;
236 /**The target bit-rate in bits per second.
237 If initializing an encoder with this struct, set this field to a non-zero
238 value to activate CBR encoding by default.*/
239 /*TODO: Current encoder does not support CBR mode, or anything like it.
240 We also don't really know what nominal rate each quality level
241 corresponds to yet.*/
242 int target_bitrate;
243 /**The target quality level.
244 Valid values range from 0 to 63, inclusive, with higher values giving
245 higher quality.
246 If initializing an encoder with this struct, and #target_bitrate is set
247 to zero, VBR encoding at this quality will be activated by default.*/
248 /*Currently this is set so that a qi of 0 corresponds to distortions of 24
249 times the JND, and each increase by 16 halves that value.
250 This gives us fine discrimination at low qualities, yet effective rate
251 control at high qualities.
252 The qi value 63 is special, however.
253 For this, the highest quality, we use one half of a JND for our threshold.
254 Due to the lower bounds placed on allowable quantizers in Theora, we will
255 not actually be able to achieve quality this good, but this should
256 provide as close to visually lossless quality as Theora is capable of.
257 We could lift the quantizer restrictions without breaking VP3.1
258 compatibility, but this would result in quantized coefficients that are
259 too large for the current bitstream to be able to store.
260 We'd have to redesign the token syntax to store these large coefficients,
261 which would make transcoding complex.*/
262 int quality;
263 /**The amount to shift to extract the last keyframe number from the granule
264 * position.
265 * This can be at most 31.
266 * th_info_init() will set this to a default value (currently <tt>6</tt>,
267 * which is good for streaming applications), but you can set it to 0 to
268 * make every frame a keyframe.
269 * The maximum distance between key frames is
270 * <tt>1<<#keyframe_granule_shift</tt>.
271 * The keyframe frequency can be more finely controlled with
272 * #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted
273 * during encoding (for example, to force the next frame to be a keyframe),
274 * but it cannot be set larger than the amount permitted by this field after
275 * the headers have been output.*/
276 int keyframe_granule_shift;
277 }th_info;
279 /**The comment information.
281 * This structure holds the in-stream metadata corresponding to
282 * the 'comment' header packet.
283 * The comment header is meant to be used much like someone jotting a quick
284 * note on the label of a video.
285 * It should be a short, to the point text note that can be more than a couple
286 * words, but not more than a short paragraph.
288 * The metadata is stored as a series of (tag, value) pairs, in
289 * length-encoded string vectors.
290 * The first occurrence of the '=' character delimits the tag and value.
291 * A particular tag may occur more than once, and order is significant.
292 * The character set encoding for the strings is always UTF-8, but the tag
293 * names are limited to ASCII, and treated as case-insensitive.
294 * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
295 * specification</a>, Section 6.3.3 for details.
297 * In filling in this structure, th_decode_headerin() will null-terminate
298 * the user_comment strings for safety.
299 * However, the bitstream format itself treats them as 8-bit clean vectors,
300 * possibly containing null characters, and so the length array should be
301 * treated as their authoritative length.
303 typedef struct th_comment{
304 /**The array of comment string vectors.*/
305 char **user_comments;
306 /**An array of the corresponding length of each vector, in bytes.*/
307 int *comment_lengths;
308 /**The total number of comment strings.*/
309 int comments;
310 /**The null-terminated vendor string.
311 This identifies the software used to encode the stream.*/
312 char *vendor;
313 }th_comment;
317 /**A single base matrix.*/
318 typedef unsigned char th_quant_base[64];
320 /**A set of \a qi ranges.*/
321 typedef struct{
322 /**The number of ranges in the set.*/
323 int nranges;
324 /**The size of each of the #nranges ranges.
325 These must sum to 63.*/
326 const int *sizes;
327 /**#nranges <tt>+1</tt> base matrices.
328 Matrices \a i and <tt>i+1</tt> form the endpoints of range \a i.*/
329 const th_quant_base *base_matrices;
330 }th_quant_ranges;
332 /**A complete set of quantization parameters.
333 The quantizer for each coefficient is calculated as:
334 \code
335 Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100),
336 1024).
337 \endcode
339 \a qti is the quantization type index: 0 for intra, 1 for inter.
340 <tt>ci!=0</tt> is 0 for the DC coefficient and 1 for AC coefficients.
341 \a qi is the quality index, ranging between 0 (low quality) and 63 (high
342 quality).
343 \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr.
344 \a ci is the DCT coefficient index.
345 Coefficient indices correspond to the normal 2D DCT block
346 ordering--row-major with low frequencies first--\em not zig-zag order.
348 Minimum quantizers are constant, and are given by:
349 \code
350 qmin[2][2]={{4,2},{8,4}}.
351 \endcode
353 Parameters that can be stored in the bitstream are as follows:
354 - The two scale matrices ac_scale and dc_scale.
355 \code
356 scale[2][64]={dc_scale,ac_scale}.
357 \endcode
358 - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all).
359 In order to avoid storing a full 384 base matrices, only a sparse set of
360 matrices are stored, and the rest are linearly interpolated.
361 This is done as follows.
362 For each \a qti and \a pli, a series of \a n \a qi ranges is defined.
363 The size of each \a qi range can vary arbitrarily, but they must sum to
365 Then, <tt>n+1</tt> matrices are specified, one for each endpoint of the
366 ranges.
367 For interpolation purposes, each range's endpoints are the first \a qi
368 value it contains and one past the last \a qi value it contains.
369 Fractional values are rounded to the nearest integer, with ties rounded
370 away from zero.
372 Base matrices are stored by reference, so if the same matrices are used
373 multiple times, they will only appear once in the bitstream.
374 The bitstream is also capable of omitting an entire set of ranges and
375 its associated matrices if they are the same as either the previous
376 set (indexed in row-major order) or if the inter set is the same as the
377 intra set.
379 - Loop filter limit values.
380 The same limits are used for the loop filter in all color planes, despite
381 potentially differing levels of quantization in each.
383 For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater
384 than <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must
385 be no greater than <tt>base[qti][pli][qi-1][ci]</tt>.
386 These two conditions ensure that the actual quantizer for a given \a qti,
387 \a pli, and \a ci does not increase as \a qi increases.
388 This is not required by the decoder.*/
389 typedef struct{
390 /**The DC scaling factors.*/
391 ogg_uint16_t dc_scale[64];
392 /**The AC scaling factors.*/
393 ogg_uint16_t ac_scale[64];
394 /**The loop filter limit values.*/
395 unsigned char loop_filter_limits[64];
396 /**The \a qi ranges for each \a ci and \a pli.*/
397 th_quant_ranges qi_ranges[2][3];
398 }th_quant_info;
402 /**The number of Huffman tables used by Theora.*/
403 #define TH_NHUFFMAN_TABLES (80)
404 /**The number of DCT token values in each table.*/
405 #define TH_NDCT_TOKENS (32)
407 /**A Huffman code for a Theora DCT token.
408 * Each set of Huffman codes in a given table must form a complete, prefix-free
409 * code.
410 * There is no requirement that all the tokens in a table have a valid code,
411 * but the current encoder is not optimized to take advantage of this.
412 * If each of the five grouops of 16 tables does not contain at least one table
413 * with a code for every token, then the encoder may fail to encode certain
414 * frames.
415 * The complete table in the first group of 16 does not have to be in the same
416 * place as the complete table in the other groups, but the complete tables in
417 * the remaining four groups must all be in the same place.*/
418 typedef struct{
419 /**The bit pattern for the code, with the LSbit of the pattern aligned in
420 * the LSbit of the word.*/
421 ogg_uint32_t pattern;
422 /**The number of bits in the code.
423 * This must be between 0 and 32, inclusive.*/
424 int nbits;
425 }th_huff_code;
429 /**\defgroup basefuncs Functions Shared by Encode and Decode*/
430 /*@{*/
431 /**\name Basic shared functions*/
432 /*@{*/
433 /**Retrieves a human-readable string to identify the library vendor and
434 * version.
435 * \return the version string.*/
436 extern const char *th_version_string(void);
437 /**Retrieves the library version number.
438 * This is the highest bitstream version that the encoder library will produce,
439 * or that the decoder library can decode.
440 * This number is composed of a 16-bit major version, 8-bit minor version
441 * and 8 bit sub-version, composed as follows:
442 * \code
443 * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR)
444 * \endcode
445 * \return the version number.*/
446 extern ogg_uint32_t th_version_number(void);
447 /**Converts a granule position to an absolute frame number.
448 * The granule position is interpreted in the context of a given
449 * #th_enc_ctx or #th_dec_ctx handle (either will suffice).
450 * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx
451 * handle.
452 * \param _granpos The granule position to convert.
453 * \returns The absolute frame number corresponding to \a _granpos.
454 * \retval -1 The given granule position was invalid (i.e. negative).*/
455 extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos);
456 /**Converts a granule position to an absolute time in seconds.
457 * The granule position is interpreted in the context of a given
458 * #th_enc_ctx or #th_dec_ctx handle (either will suffice).
459 * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx
460 * handle.
461 * \param _granpos The granule position to convert.
462 * \return The absolute time in seconds corresponding to \a _granpos.
463 * \retval -1 The given granule position was invalid (i.e. negative).*/
464 extern double th_granule_time(void *_encdec,ogg_int64_t _granpos);
465 /**Determines whether a Theora packet is a header or not.
466 * This function does no verification beyond checking the packet type bit, so
467 * it should not be used for bitstream identification; use
468 * th_decode_headerin() for that.
469 * As per the Theora specification, an empty (0-byte) packet is treated as a
470 * data packet (a delta frame with no coded blocks).
471 * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
472 * \retval 1 The packet is a header packet
473 * \retval 0 The packet is a video data packet.*/
474 extern int th_packet_isheader(ogg_packet *_op);
475 /**Determines whether a theora packet is a key frame or not.
476 * This function does no verification beyond checking the packet type and
477 * key frame bits, so it should not be used for bitstream identification; use
478 * th_decode_headerin() for that.
479 * As per the Theora specification, an empty (0-byte) packet is treated as a
480 * delta frame (with no coded blocks).
481 * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
482 * \retval 1 The packet contains a key frame.
483 * \retval 0 The packet contains a delta frame.
484 * \retval -1 The packet is not a video data packet.*/
485 extern int th_packet_iskeyframe(ogg_packet *_op);
486 /*@}*/
489 /**\name Functions for manipulating header data*/
490 /*@{*/
491 /**Initializes a th_info structure.
492 * This should be called on a freshly allocated #th_info structure before
493 * attempting to use it.
494 * \param _info The #th_info struct to initialize.*/
495 extern void th_info_init(th_info *_info);
496 /**Clears a #th_info structure.
497 * This should be called on a #th_info structure after it is no longer
498 * needed.
499 * \param _info The #th_info struct to clear.*/
500 extern void th_info_clear(th_info *_info);
502 /**Initialize a #th_comment structure.
503 * This should be called on a freshly allocated #th_comment structure
504 * before attempting to use it.
505 * \param _tc The #th_comment struct to initialize.*/
506 extern void th_comment_init(th_comment *_tc);
507 /**Add a comment to an initialized #th_comment structure.
508 * \note Neither th_comment_add() nor th_comment_add_tag() support
509 * comments containing null values, although the bitstream format does
510 * support them.
511 * To add such comments you will need to manipulate the #th_comment
512 * structure directly.
513 * \param _tc The #th_comment struct to add the comment to.
514 * \param _comment Must be a null-terminated UTF-8 string containing the
515 * comment in "TAG=the value" form.*/
516 extern void th_comment_add(th_comment *_tc, char *_comment);
517 /**Add a comment to an initialized #th_comment structure.
518 * \note Neither th_comment_add() nor th_comment_add_tag() support
519 * comments containing null values, although the bitstream format does
520 * support them.
521 * To add such comments you will need to manipulate the #th_comment
522 * structure directly.
523 * \param _tc The #th_comment struct to add the comment to.
524 * \param _tag A null-terminated string containing the tag associated with
525 * the comment.
526 * \param _val The corresponding value as a null-terminated string.*/
527 extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val);
528 /**Look up a comment value by its tag.
529 * \param _tc An initialized #th_comment structure.
530 * \param _tag The tag to look up.
531 * \param _count The instance of the tag.
532 * The same tag can appear multiple times, each with a distinct
533 * value, so an index is required to retrieve them all.
534 * The order in which these values appear is significant and
535 * should be preserved.
536 * Use th_comment_query_count() to get the legal range for
537 * the \a _count parameter.
538 * \return A pointer to the queried tag's value.
539 * This points directly to data in the #th_comment structure.
540 * It should not be modified or freed by the application, and
541 * modifications to the structure may invalidate the pointer.
542 * \retval NULL If no matching tag is found.*/
543 extern char *th_comment_query(th_comment *_tc,char *_tag,int _count);
544 /**Look up the number of instances of a tag.
545 * Call this first when querying for a specific tag and then iterate over the
546 * number of instances with separate calls to th_comment_query() to
547 * retrieve all the values for that tag in order.
548 * \param _tc An initialized #th_comment structure.
549 * \param _tag The tag to look up.
550 * \return The number on instances of this particular tag.*/
551 extern int th_comment_query_count(th_comment *_tc,char *_tag);
552 /**Clears a #th_comment structure.
553 * This should be called on a #th_comment structure after it is no longer
554 * needed.
555 * It will free all memory used by the structure members.
556 * \param _tc The #th_comment struct to clear.*/
557 extern void th_comment_clear(th_comment *_tc);
558 /*@}*/
559 /*@}*/
563 #if defined(__cplusplus)
565 #endif
567 #endif