Made the spec agree with the code for the non-normative foward DCT transform.
[xiph/unicode.git] / theora / doc / spec / spec.tex
blob4f725e34ee85b68137e8f003b1f89a06e4a77e79
1 \documentclass[9pt,letterpaper]{book}
3 \usepackage{latexsym}
4 \usepackage{amssymb}
5 \usepackage{amsmath}
6 \usepackage{bm}
7 \usepackage{textcomp}
8 \usepackage{graphicx}
9 \usepackage{booktabs}
10 \usepackage{tabularx}
11 \usepackage{longtable}
12 \usepackage{ltablex}
13 \usepackage{wrapfig}
14 \usepackage[pdfpagemode=None,pdfstartview=FitH,pdfview=FitH,colorlinks=true]%
15 {hyperref}
17 \newtheorem{theorem}{Theorem}[section]
18 \newcommand{\idx}[1]{{\ensuremath{\mathit{#1}}}}
19 \newcommand{\qti}{\idx{qti}}
20 \newcommand{\qtj}{\idx{qtj}}
21 \newcommand{\pli}{\idx{pli}}
22 \newcommand{\plj}{\idx{plj}}
23 \newcommand{\qi}{\idx{qi}}
24 \newcommand{\ci}{\idx{ci}}
25 \newcommand{\bmi}{\idx{bmi}}
26 \newcommand{\bmj}{\idx{bmj}}
27 \newcommand{\qri}{\idx{qri}}
28 \newcommand{\qrj}{\idx{qrj}}
29 \newcommand{\hti}{\idx{hti}}
30 \newcommand{\sbi}{\idx{sbi}}
31 \newcommand{\bi}{\idx{bi}}
32 \newcommand{\bj}{\idx{bj}}
33 \newcommand{\mbi}{\idx{mbi}}
34 \newcommand{\mbj}{\idx{mbj}}
35 \newcommand{\mi}{\idx{mi}}
36 \newcommand{\cbi}{\idx{cbi}}
37 \newcommand{\qii}{\idx{qii}}
38 \newcommand{\ti}{\idx{ti}}
39 \newcommand{\tj}{\idx{tj}}
40 \newcommand{\rfi}{\idx{rfi}}
41 \newcommand{\zzi}{\idx{zzi}}
42 \newcommand{\ri}{\idx{ri}}
43 %This somewhat odd construct ensures that \bitvar{\qi}, etc., will set the
44 % qi in bold face, even though it is in a \mathit font, yet \bitvar{VAR} will
45 % set VAR in a bold, roman font.
46 \newcommand{\bitvar}[1]{\ensuremath{\mathbf{\bm{#1}}}}
47 \newcommand{\locvar}[1]{\ensuremath{\mathrm{#1}}}
48 \newcommand{\term}[1]{{\em #1}}
49 \newcommand{\bin}[1]{\ensuremath{\mathtt{b#1}}}
50 \newcommand{\hex}[1]{\ensuremath{\mathtt{0x#1}}}
51 \newcommand{\ilog}{\ensuremath{\mathop{\mathrm{ilog}}\nolimits}}
52 \newcommand{\round}{\ensuremath{\mathop{\mathrm{round}}\nolimits}}
53 \newcommand{\sign}{\ensuremath{\mathop{\mathrm{sign}}\nolimits}}
54 \newcommand{\lflim}{\ensuremath{\mathop{\mathrm{lflim}}\nolimits}}
56 %Section-based table, figure, and equation numbering.
57 \numberwithin{equation}{chapter}
58 \numberwithin{figure}{chapter}
59 \numberwithin{table}{chapter}
61 \keepXColumns
63 \pagestyle{headings}
64 \bibliographystyle{alpha}
66 \title{Theora I Specification}
67 \author{Xiph.org Foundation}
68 \date{\today}
71 \begin{document}
73 \frontmatter
75 \begin{titlepage}
76 \maketitle
77 \end{titlepage}
78 \thispagestyle{empty}
79 \cleardoublepage
81 \pagenumbering{roman}
83 \thispagestyle{plain}
84 \tableofcontents
85 \cleardoublepage
87 \thispagestyle{plain}
88 \listoffigures
89 \cleardoublepage
91 \thispagestyle{plain}
92 \listoftables
93 \cleardoublepage
95 \thispagestyle{plain}
96 \markboth{{\sc Notation and Conventions}}{{\sc Notation and Conventions}}
97 \chapter*{Notation and Conventions}
99 All parameters either passed in or out of a decoding procedure are given in
100 \bitvar{bold\ face}.
102 The prefix \bin{} indicates that the following value is to be interpreted as a
103 binary number (base 2).
104 \begin{verse}
105 {\bf Example:} The value \bin{1110100} is equal to the decimal value 116.
106 \end{verse}
108 The prefix \hex{} indicates the the following value is to be interpreted as a
109 hexadecimal number (base 16).
110 \begin{verse}
111 {\bf Example:} The value \hex{74} is equal to the decimal value 116.
112 \end{verse}
114 All arithmetic defined by this specification is exact.
115 However, any real numbers that do arise will always be converted back to
116 integers again in short order.
117 The entire specification can be implemented using only normal integer
118 operations.
119 All operations are to be implemented with sufficiently large integers so that
120 overflow cannot occur.
121 Where the result of a computation is to be truncated to a fixed-sized binary
122 representation, this will be explicitly noted.
123 The size given for all variables is the maximum number of bits needed to store
124 any value in that variable.
125 Intermediate computations involving that variable may require more bits.
127 The following operators are defined:
129 \begin{description}
130 \item[$|a|$]
131 The absolute value of a number $a$.
132 \begin{align*}
133 |a| & = \left\{\begin{array}{ll}
134 -a, & a < 0 \\
135 a, & a \ge 0
136 \end{array}\right.
137 \end{align*}
139 \item[$a*b$]
140 Multiplication of a number $a$ by a number $b$.
141 \item[$\frac{a}{b}$]
142 Exact division of a number $a$ by a number $b$, producing a potentially
143 non-integer result.
145 \item[$\left\lfloor a\right\rfloor$]
146 The largest integer less than or equal to a real number $a$.
148 \item[$\left\lceil a\right\rceil$]
149 The smallest integer greater than or equal to a real number $a$.
151 \item[$a//b$]
152 Integer division of $a$ by $b$.
153 \begin{align*}
154 a//b & = \left\{\begin{array}{ll}
155 \left\lceil\frac{a}{b}\right\rceil, & a < 0 \\
156 \left\lfloor\frac{a}{b}\right\rfloor, & a \ge 0
157 \end{array}\right.
158 \end{align*}
160 \item[$a\%b$]
161 The remainder from the integer division of $a$ by $b$.
162 \begin{align*}
163 a\%b & = |a|-|b|*|a//b|
164 \end{align*}
165 Note that with this definition, the result is always non-negative and less than
166 $|b|$.
168 \item[$a<<b$]
169 The value obtained by left-shifting the two's complement integer $a$ by $b$
170 bits.
171 For purposes of this specification, overflow is ignored, and so this is
172 equivalent to integer multiplication of $a$ by $2^b$.
174 \item[$a>>b$]
175 The value obtained by right-shifting the two's complement integer $a$ by $b$
176 bits, filling in the leftmost bits of the new value with $0$ if $a$ is
177 non-negative and $1$ if $a$ is negative.
178 This is {\em not} equivalent to integer division of $a$ by $2^b$.
179 Instead,
180 \begin{align*}
181 a>>b & = \left\lfloor\frac{a}{2^b}\right\rfloor.
182 \end{align*}
184 \item[$\round(a)$]
185 Rounds a number $a$ to the nearest integer, with ties rounded away from $0$.
186 \begin{align*}
187 \round(a) = \left\{\begin{array}{ll}
188 \lceil a-\frac{1}{2}\rceil & a \le 0 \\
189 \lfloor a+\frac{1}{2}\rfloor & a > 0
190 \end{array}\right.
191 \end{align*}
193 \item[$\sign(a)$]
194 Returns the sign of a given number.
195 \begin{align*}
196 \sign(a) = \left\{\begin{array}{ll}
197 -1 & a < 0 \\
198 0 & a = 0 \\
199 1 & a > 0
200 \end{array}\right.
201 \end{align*}
203 \item[$\ilog(a)$]
204 The minimum number of bits required to store a positive integer $a$ in
205 two's complement notation, or $0$ for a non-positive integer $a$.
206 \begin{align*}
207 \ilog(a) = \left\{\begin{array}{ll}
208 0, & a \le 0 \\
209 \left\lceil\log_2{a}\right\rceil, & a > 0
210 \end{array}\right.
211 \end{align*}
213 \begin{verse}
214 {\bf Examples:}
215 \begin{itemize}
216 \item $\ilog(-1)=0$
217 \item $\ilog(0)=0$
218 \item $\ilog(1)=1$
219 \item $\ilog(2)=2$
220 \item $\ilog(3)=2$
221 \item $\ilog(4)=3$
222 \item $\ilog(7)=3$
223 \end{itemize}
224 \end{verse}
226 \item[$\min(a,b)$]
227 The minimum of two numbers $a$ and $b$.
229 \item[$\max(a,b)$]
230 The maximum of two numbers $a$ and $b$.
232 \end{description}
233 \cleardoublepage
236 \thispagestyle{plain}
237 \markboth{{\sc Key words}}{{\sc Key words}}
238 \chapter*{Key words}
240 %We can't rewrite this, because this is text required by RFC 2119, so we use
241 % some emergency stretching to get it typeset properly.
242 \setlength{\emergencystretch}{2em}
243 The key words ``MUST'', ``MUST NOT'', ``REQUIRED'', ``SHALL'', ``SHALL NOT'',
244 ``SHOULD'', ``SHOULD NOT'', ``RECOMMENDED'', ``MAY'', and ``OPTIONAL'' in this
245 document are to be intrepreted as described in RFC 2119 \cite{rfc2119}.\par
246 \setlength{\emergencystretch}{0em}
248 Where such assertions are placed on the contents of a Theora bitstream itself,
249 implementations should be prepared to encounter bitstreams that do not follow
250 these requirements.
251 An application's behavior in the presecence of such non-conforming bitstreams
252 is not defined by this specification, but any reasonable method of handling
253 them MAY be used.
254 By way of example, applications MAY discard the current frame, retain the
255 current output thus far, or attempt to continue on by assuming some default
256 values for the erroneous bits.
257 When such an error occurs in the bitstream headers, an application MAY refuse
258 to decode the entire stream.
259 An application SHOULD NOT allow such non-conformant bitstreams to overflow
260 buffers and potentially execute arbitrary code, as this represents a serious
261 security risk.
263 An application MUST, however, ensure any bits marked as reserved have the value
264 zero, and refuse to decode the stream if they do not.
265 These are used as place holders for future bitstream features with which the
266 current bitstream is forward-compatible.
267 Such features may not increment the bitstream version number, and can only be
268 recognized by checking the value of these reserved bits.
270 \cleardoublepage
274 \mainmatter
276 \pagenumbering{arabic}
277 \setcounter{page}{1}
279 \chapter{Introduction}
281 Theora is a general purpose, lossy video codec.
282 It is based on the VP3 video codec produced by On2 Technologies
283 (\url{http://www.on2.com/}).
284 On2 donated the VP3.1 source code to the Xiph.org Foundation and released it
285 under a BSD-like license.
286 On2 also made an irrevocable, royalty-free license grant for any patent claims
287 it might have over the software and any derivatives.
288 No formal specification exists for the VP3 format beyond this source code,
289 however Mike Melanson maintains a detailed description \cite{Mel04}.
290 Portions of this specification were adopted from that text with permission.
292 \section{VP3 and Theora}
294 Theora contains a superset of the features that were available in the original
295 VP3 codec.
296 Content encoded with VP3.1 can be losslessly transcoded into the Theora format.
297 Theora content cannot, in general, be losslessly transcoded into the VP3
298 format.
299 If a feature is not available in the original VP3 format, this is mentioned
300 when that feature is defined.
301 A complete list of these features appears in Appendix~\ref{app:vp3-compat}.
302 %TODO: VP3 - theora comparison in appendix
304 \section{Video Formats}
306 Theora I currently supports progressive video data of arbitrary dimensions at a
307 constant frame rate in one of several $Y'C_bC_r$ color spaces.
308 The precise definition the supported color spaces appears in
309 Section~\ref{sec:colorspaces}.
310 Three different chroma subsampling formats are supported: 4:2:0, 4:2:2,
311 and 4:4:4.
312 The precise details of each of these formats and their sampling locations are
313 described in Section~\ref{sec:pixfmts}.
315 The Theora I format does not support interlaced material, variable frame rates,
316 bit-depths larger than 8 bits per component, nor alternate color spaces such
317 as RGB or arbitrary multi-channel spaces.
318 Black and white content can be efficiently encoded, however, because the
319 uniform chroma planes compress well.
320 Support for interlaced material is planned for a future version.
321 \begin{verse}
322 {\bf Note:} Infrequently changing frame rates---as when film and video
323 sequences are cut together---can be supported in the Ogg container format by
324 chaining several Theora streams together.
325 \end{verse}
326 Support for increased bit depths or additional color spaces is not planned.
328 \section{Classification}
330 Theora I is a block-based lossy transform codec that utilizes an
331 $8\times 8$ Type-II Discrete Cosine Transform and block-based motion
332 compensation.
333 This places it in the same class of codecs as MPEG-1, -2, -4, and H.263.
334 The details of how individual blocks are organized and how DCT coefficients are
335 stored in the bitstream differ substantially from these codecs, however.
336 Theora supports only intra frames (I frames in MPEG) and inter frames (P frames
337 in MPEG).
338 There is no equivalent to the bi-predictive frames (B frames) found in MPEG
339 codecs.
341 \section{Assumptions}
343 The Theora codec design assumes a complex, psychovisually-aware encoder and a
344 simple, low-complexity decoder.
345 %TODO: Talk more about implementation complexity.
347 Theora provides none of its own framing, synchronization, or protection against
348 transmission errors.
349 An encoder is solely a method of accepting input video frames and
350 compressing these frames into raw, unformatted `packets'.
351 The decoder then accepts these raw packets in sequence, decodes them, and
352 synthesizes a fascimile of the original video frames.
353 Theora is a free-form variable bit rate (VBR) codec, and packets have no
354 minimum size, maximum size, or fixed/expected size.
356 Theora packets are thus intended to be used with a transport mechanism that
357 provides free-form framing, synchronization, positioning, and error correction
358 in accordance with these design assumptions, such as Ogg (for file transport)
359 or RTP (for network multicast).
360 For the purposes of a few examples in this document, we will assume that Theora
361 is embedded in an Ogg stream specifically, although this is by no means a
362 requirement or fundamental assumption in the Theora design.
364 The specification for embedding Theora into an Ogg transport stream is given in
365 Appendix~\ref{app:oggencapsulation}.
367 \section{Codec Setup and Probability Model}
369 Theora's heritage is the proprietary commerical codec VP3, and it retains a
370 fair amount of inflexibility when compared to Vorbis \cite{vorbis}, the first
371 Xiph.org codec, which began as a research codec.
372 However, to provide additional scope for encoder improvement, Theora adopts
373 some of the configurable aspects of decoder setup that are present in Vorbis.
374 This configuration data is not available in VP3, which uses hardcoded values
375 instead.
377 Theora makes the same controversial design decision that Vorbis made to include
378 the entire probability model for the DCT coefficients and all the quantization
379 parameters in the bitstream headers.
380 This is often several hundred fields.
381 It is therefore impossible to decode any frame in the stream without
382 having previously fetched the codec info and codec setup headers.
384 \begin{verse}
385 {\bf Note:} Theora {\em can} initiate decode at an arbitrary intra-frame packet
386 within a bitstream so long as the codec has been initialized with the setup
387 headers.
388 \end{verse}
390 Thus, Theora headers are both required for decode to begin and relatively large
391 as bitstream headers go.
392 The header size is unbounded, although as a rule-of-thumb less than 16kB is
393 recommended, and Xiph.org's reference encoder follows this suggestion.
394 %TODO: Is 8kB enough? My setup header is 7.4kB, that doesn't leave much room
395 % for comments.
396 %RG: the lesson from vorbis is that as small as possible is really
397 % important in some applications. Practically, what's acceptable
398 % depends a great deal on the target bitrate. I'd leave 16 kB in the
399 % spec for now. fwiw more than 1k of comments is quite unusual.
401 Our own design work indicates that the primary liability of the required header
402 is in mindshare; it is an unusual design and thus causes some amount of
403 complaint among engineers as this runs against current design trends and
404 points out limitations in some existing software/interface designs.
405 However, we find that it does not fundamentally limit Theora's suitable
406 application space.
408 %silvia: renamed
409 %\subsection{Format Specification}
410 \section{Format Conformance}
412 The Theora format is well-defined by its decode specification; any encoder that
413 produces packets that are correctly decoded by an implementation following
414 this specification may be considered a proper Theora encoder.
415 A decoder must faithfully and completely implement the specification defined
416 herein %, except where noted,
417 to be considered a conformant Theora decoder.
418 A decoder need not be implemented strictly as described, but the
419 actual decoder process MUST be {\em entirely mathematically equivalent}
420 to the described process.
421 Where appropriate, a non-normative description of encoder processes is
422 included.
423 These sections will be marked as such, and a proper Theora encoder is not
424 bound to follow them.
426 %TODO: \subsection{Hardware Profile}
429 \chapter{Coded Video Structure}
431 Theora's encoding and decoding process is based on $8\times 8$ blocks of
432 pixels.
433 This sections describes how a video frame is laid out, divided into
434 blocks, and how those blocks are organized.
436 \section{Frame Layout}
438 A video frame in Theora is a two-dimensional array of pixels.
439 Theora, like VP3, uses a right-handed coordinate system, with the origin in the
440 lower-left corner of the frame.
441 This is contrary to many video formats which use a left-handed coordinate
442 system with the origin in the upper-left corner of the frame.
443 %INT: This means that for interlaced material, the definition of `even fields'
444 %INT: and `odd fields' may be reversed between Theora and other video codecs.
445 %INT: This document will always refer to them as `top fields' and `bottom
446 %INT: fields'.
448 Theora divides the pixel array up into three separate \term{color planes}, one
449 for each of the $Y'$, $C_b$, and $C_r$ components of the pixel.
450 The $Y'$ plane is also called the \term{luma plane}, and the $C_b$ and $C_r$
451 planes are also called the \term{chroma planes}.
452 Each plane is assigned a numerical value, as shown in
453 Table~\ref{tab:color-planes}.
455 \begin{table}[htbp]
456 \begin{center}
457 \begin{tabular}{cl}\toprule
458 Index & Color Plane \\\midrule
459 $0$ & $Y'$ \\
460 $1$ & $C_b$ \\
461 $2$ & $C_r$ \\
462 \bottomrule\end{tabular}
463 \end{center}
464 \caption{Color Plane Indices}
465 \label{tab:color-planes}
466 \end{table}
468 In some pixel formats, the chroma planes are subsampled by a factor of two
469 in one or both directions.
470 This means that the width or height of the chroma planes may be half that of
471 the total frame width and height.
472 The luma plane is never subsampled.
474 \section{Picture Region}
476 An encoded video frame in Theora is required to have a width and height that
477 are multiples of sixteen, making an integral number of blocks even when the
478 chroma planes are subsampled.
479 However, inside a frame a smaller \term{picture region} may be defined
480 to present material whose dimensions are not a multiple of sixteen pixels, as
481 shown in Figure~\ref{fig:pic-frame}.
482 The picture region can be offset from the lower-left corner of the frame by up
483 to 255 pixels in each direction, and may have an arbitrary width and height,
484 provided that it is contained entirely within the coded frame.
485 It is this picture region that contains the actual video data.
486 The portions of the frame which lie outside the picture region may contain
487 arbitrary image data, so the frame must be cropped to the picture region
488 before display.
489 The picture region plays no other role in the decode process, which operates on
490 the entire video frame.
492 \begin{figure}[htbp]
493 \begin{center}
494 \includegraphics{pic-frame}
495 \end{center}
496 \caption{Location of frame and picture regions}
497 \label{fig:pic-frame}
498 \end{figure}
500 \section{Blocks and Super Blocks}
501 \label{sec:blocks-and-sbs}
503 Each color plane is subdivided into \term{blocks} of $8\times 8$ pixels.
504 Blocks are grouped into $4\times 4$ arrays called \term{super blocks} as
505 shown in Figure~\ref{fig:superblock}.
506 Each color plane has its own set of blocks and super blocks.
507 If the chroma planes are subsampled, they are still divided into $8\times 8$
508 blocks of pixels; there are just fewer blocks than in the luma plane.
509 The boundaries of blocks and super blocks in the luma plane do not necessarily
510 coincide with those of the chroma planes, if the chroma planes have been
511 subsampled.
513 \begin{figure}[htbp]
514 \begin{center}
515 \includegraphics{superblock}
516 \end{center}
517 \caption{Subdivision of a frame into blocks and super blocks}
518 \label{fig:superblock}
519 \end{figure}
521 Blocks are accessed in two different orders in the various decoder processes.
522 The first is \term{raster order}, illustrated in Figure~\ref{fig:raster-block}.
523 This accesses each block in row-major order, starting in the lower left of the
524 frame and continuing along the bottom row of the entire frame, followed by the
525 next row up, starting on the left edge of the frame, etc.
527 \begin{figure}[htbp]
528 \begin{center}
529 \includegraphics{raster-block}
530 \end{center}
531 \caption{Raster ordering of $n\times m$ blocks}
532 \label{fig:raster-block}
533 \end{figure}
535 The second is \term{coded order}.
536 In coded order, blocks are accessed by super block.
537 Within each frame, super blocks are traversed in raster order,
538 similar to raster order for blocks.
539 Within each super block, however, blocks are accessed in a Hilbert curve
540 pattern, illustrated in Figure~\ref{fig:hilbert-block}.
541 If a color plane does not contain a complete super block on the top or right
542 sides, the same ordering is still used, simply with any blocks outside the
543 frame boundary ommitted.
545 \begin{figure}[htbp]
546 \begin{center}
547 \includegraphics{hilbert-block}
548 \end{center}
549 \caption{Hilbert curve ordering of blocks within a super block}
550 \label{fig:hilbert-block}
551 \end{figure}
553 To illustrate this ordering, consider a frame that is 240 pixels wide and
554 48 pixels high.
555 Each row of the luma plane has 30 blocks and 8 super blocks, and there are 6
556 rows of blocks and two rows of super blocks.
558 %When accessed in raster order, each block in the luma plane is assigned the
559 % following indices:
561 %\vspace{\baselineskip}
562 %\begin{center}
563 %\begin{tabular}{|ccccccc|}\hline
564 %150 & 151 & 152 & 153 & $\ldots$ & 178 & 179 \\
565 %120 & 121 & 122 & 123 & $\ldots$ & 148 & 149 \\\hline
566 % 90 & 91 & 92 & 93 & $\ldots$ & 118 & 119 \\
567 % 60 & 61 & 62 & 63 & $\ldots$ & 88 & 89 \\
568 % 30 & 31 & 32 & 33 & $\ldots$ & 58 & 59 \\
569 % 0 & 1 & 2 & 3 & $\ldots$ & 28 & 29 \\\hline
570 %\end{tabular}
571 %\end{center}
572 %\vspace{\baselineskip}
574 When accessed in coded order, each block in the luma plane is assigned the
575 following indices:
577 \vspace{\baselineskip}
578 \begin{center}
579 \begin{tabular}{|cccc|c|cc|}\hline
580 123 & 122 & 125 & 124 & $\ldots$ & 179 & 178 \\
581 120 & 121 & 126 & 127 & $\ldots$ & 176 & 177 \\\hline
582 5 & 6 & 9 & 10 & $\ldots$ & 117 & 118 \\
583 4 & 7 & 8 & 11 & $\ldots$ & 116 & 119 \\
584 3 & 2 & 13 & 12 & $\ldots$ & 115 & 114 \\
585 0 & 1 & 14 & 15 & $\ldots$ & 112 & 113 \\\hline
586 \end{tabular}
587 \end{center}
588 \vspace{\baselineskip}
590 Here the index values specify the order in which the blocks would be accessed.
591 The indices of the blocks are numbered continuously from one color plane to the
592 next.
593 They do not reset to zero at the start of each plane.
594 Instead, the numbering increases continuously from the $Y'$ plane to the $C_b$
595 plane to the $C_r$ plane.
596 The implication is that the blocks from all planes are treated as a unit during
597 the various processing steps.
599 Although blocks are sometimes accessed in raster order, in this document the
600 index associated with a block is {\em always} its index in coded order.
602 \section{Macro Blocks}
603 \label{sec:mbs}
605 A macro block contains a $2\times 2$ array of blocks in the luma plane
606 {\em and} the co-located blocks in the chroma planes, as shown in
607 Figure~\ref{fig:macroblock}.
608 Thus macro blocks can represent anywhere from six to twelve blocks, depending
609 on how the chroma planes are subsampled.
610 This is in contrast to super blocks, which only contain blocks from a single
611 color plane.
612 % the whole super vs. macro blocks thing is a little confusing, and it can be
613 % hard to remember which is what initially. A figure would/will help here,
614 % but I tried to add some text emphasizing the difference in terms of
615 % functionality.
616 %TBT: At this point we haven't described any functionality yet.
617 %TBT: As far as the reader knows, the only purpose of the blocks, macro blocks
618 %TBT: and super blocks is for data organization---and for blocks and super
619 %TBT: blocks, this is essentially true.
620 %TBT: So lets restrict the differences we emphasize to those of data
621 %TBT: organization, which the sentence I just added above does.
622 Macro blocks contain information about coding mode and motion vectors for the
623 corresponding blocks in all color planes.
625 \begin{figure}[htbp]
626 \begin{center}
627 \includegraphics{macroblock}
628 \end{center}
629 \caption{Subdivision of a frame into macro blocks}
630 \label{fig:macroblock}
631 \end{figure}
633 Macro blocks are also accessed in a \term{coded order}.
634 This coded order proceeds by examining each super block in the luma plane in
635 raster order, and traversing the four macro blocks inside using a smaller
636 Hilbert curve, as shown in Figure~\ref{fig:hilbert-mb}.
637 %r: I rearranged the wording to make a more formal idiom here
638 If the luma plane does not contain a complete super block on the top or right
639 sides, the same ordering is still used, with any macro blocks outside
640 the frame boundary simply omitted.
641 Because the frame size is constrained to be a multiple of 16, there are never
642 any partial macro blocks.
643 Unlike blocks, macro blocks need never be accessed in a pure raster order.
645 \begin{figure}[htbp]
646 \begin{center}
647 \includegraphics{hilbert-mb}
648 \end{center}
649 \caption{Hilbert curve ordering of macro blocks within a super block}
650 \label{fig:hilbert-mb}
651 \end{figure}
653 Using the same frame size as the example above, there are 15 macro blocks in
654 each row and 3 rows of macro blocks.
655 The macro blocks are assigned the following indices:
657 \vspace{\baselineskip}
658 \begin{center}
659 \begin{tabular}{|cc|cc|c|cc|c|}\hline
660 30 & 31 & 32 & 33 & $\cdots$ & 42 & 43 & 44 \\\hline
661 1 & 2 & 5 & 6 & $\cdots$ & 25 & 26 & 29 \\
662 0 & 3 & 4 & 7 & $\cdots$ & 24 & 27 & 28 \\\hline
663 \end{tabular}
664 \end{center}
665 \vspace{\baselineskip}
667 \section{Coding Modes and Prediction}
669 Each block is coded using one of a small, fixed set of \term{coding modes} that
670 define how the block is predicted from previous frames.
671 A block is predicted using one of two \term{reference frames}, selected
672 according to the coding mode.
673 A reference frame is the fully decoded version of a previous frame in the
674 stream.
675 The first available reference frame is the previous intra frame, called the
676 \term{golden frame}.
677 The second available reference frame is the previous frame, whether it was an
678 intra frame or an inter frame.
679 If the previous frame was an intra frame, then both reference frames are the
680 same.
681 See Figure~\ref{fig:reference-frames} for an illustration of the reference
682 frames used for an intra frame that does not follow an intra frame.
684 \begin{figure}[htbp]
685 \begin{center}
686 \includegraphics{reference-frames}
687 \end{center}
688 \caption{Example of reference frames for an inter frame}
689 \label{fig:reference-frames}
690 \end{figure}
692 Two coding modes in particular are worth mentioning here.
693 The INTRA mode is used for blocks that are not predicted from either reference
694 frame.
695 This is the only coding mode allowed in intra frames.
696 The INTER\_NOMV coding mode uses the co-located contents of the block in the
697 previous frame as the predictor.
698 This is the default coding mode.
700 \section{DCT Coefficients}
701 \label{sec:dct-coeffs}
703 A \term{residual} is added to the predicted contents of a block to form the
704 final reconstruction.
705 The residual is stored as a set of quantized coefficients from an integer
706 approximation of a two-dimensional Type II Discrete Cosine Transform.
707 The DCT takes an $8\times 8$ array of pixel values as input and returns an
708 $8\times 8$ array of coefficient values.
709 The \term{natural ordering} of these coefficients is defined to be row-major
710 order, from lowest to highest frequency.
711 They are also often indexed in \term{zig-zag order}, as shown in
712 Figure~\ref{tab:zig-zag}.
714 \begin{figure}[htbp]
715 \begin{center}
716 \begin{tabular}[c]{rr|c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c}
717 &\multicolumn{1}{r}{} & && &&&&&$c$&&& && && \\
718 &\multicolumn{1}{r}{} &0&&1&&2&&3&&4&&5&&6&&7 \\\cline{3-17}
719 &0 & 0 &$\rightarrow$& 1 && 5 &$\rightarrow$& 6 && 14 &$\rightarrow$& 15 && 27 &$\rightarrow$& 28 \\[-0.5\defaultaddspace]
720 & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\
721 &1 & 2 & & 4 && 7 & & 13 && 16 & & 26 && 29 & & 42 \\[-0.5\defaultaddspace]
722 & &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
723 &2 & 3 & & 8 && 12 & & 17 && 25 & & 30 && 41 & & 43 \\[-0.5\defaultaddspace]
724 & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\
725 &3 & 9 & & 11 && 18 & & 24 && 31 & & 40 && 44 & & 53 \\[-0.5\defaultaddspace]
726 $r$&&$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
727 &4 & 10 & & 19 && 23 & & 32 && 39 & & 45 && 52 & & 54 \\[-0.5\defaultaddspace]
728 & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\
729 &5 & 20 & & 22 && 33 & & 38 && 46 & & 51 && 55 & & 60 \\[-0.5\defaultaddspace]
730 & &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
731 &6 & 21 & & 34 && 37 & & 47 && 50 & & 56 && 59 & & 61 \\[-0.5\defaultaddspace]
732 & & &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$& \\
733 &7 & 35 &$\rightarrow$& 36 && 48 &$\rightarrow$& 49 && 57 &$\rightarrow$& 58 && 62 &$\rightarrow$& 63
734 \end{tabular}
735 \end{center}
736 \caption{Zig-zag order}
737 \label{tab:zig-zag}
738 \end{figure}
740 \begin{verse}
741 {\bf Note:} the row and column indices refer to {\em frequency number} and not
742 pixel locations.
743 The frequency numbers are defined independently of the memory organization of
744 the pixels.
745 They have been written from top to bottom here to follow conventional notation,
746 despite the right-handed coordinate system Theora uses for pixel locations.
747 %RG: I'd rather we were internally consistent and put dc at the lower left.
748 Many implementations of the DCT operate `in-place'.
749 That is, they return DCT coefficients in the same memory buffer that the
750 initial pixel values were stored in.
751 Due to the right-handed coordinate system used for pixel locations in Theora,
752 one must note carefully how both pixel values and DCT coefficients are
753 organized in memory in such a system.
754 \end{verse}
756 DCT coefficient $(0,0)$ is called the \term{DC coefficient}.
757 All the other coefficients are called \term{AC coefficients}.
760 \chapter{Decoding Overview}
762 This section provides a high level description of the Theora codec's
763 construction.
764 A bit-by-bit specification appears beginning in Section~\ref{sec:bitpacking}.
765 The later sections assume a high-level understanding of the Theora decode
766 process, which is provided below.
768 \section{Decoder Configuration}
770 Decoder setup consists of configuration of the quantization matrices and the
771 Huffman codebooks for the DCT coefficients, and a table of limit values for
772 the deblocking filter.
773 The remainder of the decoding pipeline is not configurable.
775 \subsection{Global Configuration}
777 The global codec configuration consists of a few video related fields, such as
778 frame rate, frame size, picture size and offset, aspect ratio, color space,
779 pixel format, and a version number.
780 The version number is divided into a major version, a minor version, amd a
781 minor revision number.
782 %r: afaik the released vp3 codec called itself 3.1 and is compatible w/ theora
783 %r: even though we received the in-progress 3.2 codebase
784 For the format defined in this specification, these are `3', `2', and
785 `0', respectively, in reference to Theora's origin as a successor to the VP3.1
786 format.
788 \subsection{Quantization Matrices}
790 Theora allows up to 384 different quantization matrices to be defined, one for
791 each \term{quantization type}, \term{color plane} ($Y'$, $C_b$, or $C_r$), and
792 \term{quantization index}, \qi, which ranges from zero to 63, inclusive.
793 There are currently two quantization types defined, which depend on the coding
794 mode of the block being dequantized, as shown in Table~\ref{tab:quant-types}.
796 \begin{table}[htbp]
797 \begin{center}
798 \begin{tabular}{cl}\toprule
799 Quantization Type & Usage \\\midrule
800 $0$ & INTRA-mode blocks \\
801 $1$ & Blocks in any other mode. \\
802 \bottomrule\end{tabular}
803 \end{center}
804 \caption{Quantization Type Indices}
805 \label{tab:quant-types}
806 \end{table}
808 %r: I think 'nominally' is more specific than 'generally' here
809 The quantization index, on the other hand, nominally represents a progressive
810 range of quality levels, from low quality near zero to high quality near 63.
811 However, the interpretation is arbitrary, and it is possible, for example, to
812 partition the scale into two completely separate ranges with 32 levels each
813 that are meant to represent different classes of source material, or any
814 other arrangement that suits the encoder's requirements.
816 Each quantization matrix is an $8\times 8$ matrix of 16-bit values, which is
817 used to quantize the output of the $8\times 8$ DCT\@.
818 Quantization matrices are specified using three components: a
819 \term{base matrix} and two \term{scale values}.
820 The first scale value is the \term{DC scale}, which is applied to the DC
821 component of the base matrix.
822 The second scale value is the \term{AC scale}, which is applied to all the
823 other components of the base matrix.
824 There are 64 DC scale values and 64 AC scale values, one for each \qi\ value.
826 There are 64 elements in each base matrix, one for each DCT coefficient.
827 They are stored in natural order (cf. Section~\ref{sec:dct-coeffs}).
828 There is a separate set of base matrices for each quantization type and each
829 color plane, with up to 64 possible base matrices in each set, one for each
830 \qi\ value.
831 %r: we will mention that the given matricies must bound the \qi range
832 %r: in the detailed section. it's not important at this level.
833 Typically the bitstream contains matrices for only a sparse subset of the
834 possible \qi\ values.
835 The base matrices for the remainder of the \qi\ values are computed using
836 linear interpolation.
837 This configuration allows the encoder to adjust the quantization matrices to
838 approximate the complex, non-linear response of the human visual system to
839 different quantization errors.
841 Finally, because the in-loop deblocking filter strength depends on the strength
842 of the quantization matrices defined in this header, a table of 64 \term{loop
843 filter limit values} is defined, one for each \qi\ value.
845 The precise specification of how all of this information is decoded appears in
846 Section~\ref{sub:loop-filter-limits} and Section~\ref{sub:quant-params}.
848 \subsection{Huffman Codebooks}
850 Theora uses 80 configurable binary Huffman codes to represent the 32 tokens
851 used to encode DCT coefficients.
852 Each of the 32 token values has a different semantic meaning and is used to
853 represent single coefficient values, zero runs, combinations of the two, and
854 \term{End-Of-Block markers}.
856 The 80 codes are divided up into five groups of 16, with each group
857 corresponding to a set of DCT coefficient indices.
858 The first group corresponds to the DC coefficient, while the remaining four
859 groups correspond to different subsets of the AC coefficients.
860 Within each frame, two pairs of 4-bit codebook indices are stored.
861 The first pair selects which codebooks to use from the DC coefficient group for
862 the $Y'$ coefficients and the $C_b$ and $C_r$ coefficients.
863 The second pair selects which codebooks to use from {\em all four} of the AC
864 coefficient groups for the $Y'$ coefficients and the $C_b$ and $C_r$
865 coefficients.
867 The precise specification of how the codebooks are decoded appears in
868 Section~\ref{sub:huffman-tables}.
870 \section{High-Level Decode Process}
872 \subsection{Decoder Setup}
874 Before decoding can begin, a decoder MUST be initialized using the bitstream
875 headers corresponding to the stream to be decoded.
876 Theora uses three header packets; all are required, in order, by this
877 specification.
878 Once set up, decode may begin at any intra-frame packet---or even inter-frame
879 packets, provided the appropriate decoded reference frames have already been
880 decoded and cached---belonging to the Theora stream.
881 In Theora I, all packets after the three initial headers are intra-frame or
882 inter-frame packets.
884 The header packets are, in order, the identification header, the comment
885 header, and the setup header.
887 \paragraph{Identification Header}
889 The identification header identifies the stream as Theora, provides a version
890 number, and defines the characteristics of the video stream such as frame
891 size.
892 A complete description of the identification header appears in
893 Section~\ref{sec:idheader}.
895 \paragraph{Comment Header}
897 The comment header includes user text comments (`tags') and a vendor string
898 for the application/library that produced the stream.
899 The format of the comment header is the same as that used in the Vorbis I and
900 Speex codecs, with slight modifications due to the use of a different bit
901 packing mechanism.
902 A complete description of how the comment header is coded appears in
903 Section~\ref{sec:commentheader}, along with a suggested set of tags.
905 \paragraph{Setup Header}
907 The setup header includes extensive codec setup information, including the
908 complete set of quantization matrices and Huffman codebooks needed to decode
909 the DCT coefficients.
910 A complete description of the setup header appears in
911 Section~\ref{sec:setupheader}.
913 \subsection{Decode Procedure}
915 The decoding and synthesis procedure for all video packets is fundamentally the
916 same, with some steps omitted for intra frames.
917 \begin{itemize}
918 \item
919 Decode packet type flag.
920 \item
921 Decode frame header.
922 \item
923 Decode coded block information (inter frames only).
924 \item
925 Decode macro block mode information (inter frames only).
926 \item
927 Decode motion vectors (inter frames only).
928 \item
929 Decode block-level \qi\ information.
930 \item
931 Decode DC coefficient for each coded block.
932 \item
933 Decode 1st AC coefficient for each coded block.
934 \item
935 Decode 2nd AC coefficient for each coded block.
936 \item
937 $\ldots$
938 \item
939 Decode 63rd AC coefficient for each coded block.
940 \item Perform DC coefficient prediction.
941 \item Reconstruct coded blocks.
942 \item Copy uncoded bocks.
943 \item Perform loop filtering.
944 \end{itemize}
946 \begin{verse}
947 {\bf Note:} clever rearrangement of the steps in this process is possible.
948 As an example, in a memory-constrained environment, one can make multiple
949 passes through the DCT coefficients to avoid buffering them all in memory.
950 On the first pass, the starting location of each coefficient is identified, and
951 then 64 separate get pointers are used to read in the 64 DCT coefficients
952 required to reconstruct each coded block in sequence.
953 This operation produces entirely equivalent output and is naturally perfectly
954 legal.
955 It may even be a benefit in non-memory-constrained environments due to a
956 reduced cache footprint.
957 \end{verse}
959 Theora makes equivalence easy to check by defining all decoding operations in
960 terms of exact integer operations.
961 No floating-point math is required, and in particular, the implementation of
962 the iDCT transform MUST be followed precisely.
963 This prevents the decoder mismatch problem commonly associated with codecs that
964 provide a less rigorous transform specification.
965 Such a mismatch problem would be devastating to Theora, since a single rounding
966 error in one frame could propagate throughout the entire succeeding frame due
967 to DC prediction.
969 \paragraph{Packet Type Decode}
971 Theora I uses four packet types.
972 The first three packet types mark each of the three Theora headers described
973 above.
974 The fourth packet type marks a video packet.
975 All other packet types are reserved; packets marked with a reserved type should
976 be ignored.
978 \paragraph{Frame Header Decode}
980 The frame header contains some global information about the current frame.
981 The first is the frame type field, which specifies if this is an intra frame or
982 an inter frame.
983 Inter frames predict their contents from previously decoded reference frames.
984 Intra frames can be independently decoded with no established reference frames.
986 The next piece of information in the frame header is the list of \qi\ values
987 allowed in the frame.
988 Theora allows from one to three different \qi\ values to be used in a single
989 frame, each of which selects a set of six quantization matrices, one for each
990 quantization type (inter or intra), and one for each color plane.
991 The first \qi\ value is {\em always} used when dequantizing DC coefficients.
992 The \qi\ value used when dequantizing AC coefficients, however, can vary from
993 block to block.
994 VP3, in contrast, only allows a single \qi\ value per frame for both the DC and
995 AC coefficients.
997 \paragraph{Coded Block Information}
999 This stage determines which blocks in the frame are coded and which are
1000 uncoded.
1001 A \term{coded block list} is constructed which lists all the coded blocks in
1002 coded order.
1003 For intra frames, every block is coded, and so no data needs to be read from
1004 the packet.
1006 \paragraph{Macro Block Mode Information}
1008 For intra frames, every block is coded in INTRA mode, and this stage is
1009 skipped.
1010 In inter frames a \term{coded macro block list} is constructed from the coded
1011 block list.
1012 Any macro block which has at least one of its luma blocks coded is considered
1013 coded; all other macro blocks are uncoded, even if they contain coded chroma
1014 blocks.
1015 A coding mode is decoded for each coded macro block, and assigned to all its
1016 constituent coded blocks.
1017 All coded chroma blocks in uncoded macro blocks are assigned the INTER\_NOMV
1018 coding mode.
1020 \paragraph{Motion Vectors}
1022 Intra frames are coded entirely in INTRA mode, and so this stage is skipped.
1023 Some inter coding modes, however, require one or more motion vectors to be
1024 specified for each macro block.
1025 These are decoded in this stage, and an appropriate motion vector is assigned
1026 to each coded block in the macro block.
1028 \paragraph{Block-Level \qi\ Information}
1030 If a frame allows multiple \qi\ values, the \qi\ value assigned to each block
1031 is decoded here.
1032 Frames that use only a single \qi\ value have nothing to decode.
1034 \paragraph{DCT Coefficients}
1036 Finally, the quantized DCT coefficients are decoded.
1037 A list of DCT coefficients in zig-zag order for a single block is represented
1038 by a list of tokens.
1039 A token can take on one of 32 different values, each with a different semantic
1040 meaning.
1041 A single token can represent a single DCT coefficient, a run of zero
1042 coefficients within a single block, a combination of a run of zero
1043 coefficients followed by a single non-zero coefficient, an
1044 \term{End-Of-Block marker}, or a run of EOB markers.
1045 EOB markers signify that the remainder of the block is one long zero run.
1046 Unlike JPEG and MPEG, there is no requirement for each block to end with
1047 a special marker.
1048 If non-EOB tokens yield values for all 64 of the coefficients in a block, then
1049 no EOB marker occurs.
1051 Each token is associated with a specific \term{token index} in a block.
1052 For single-coefficient tokens, this index is the zig-zag index of the token in
1053 the block.
1054 For zero-run tokens, this index is the zig-zag index of the {\em first}
1055 coefficient in the run.
1056 For combination tokens, the index is again the zig-zag index of the first
1057 coefficient in the zero run.
1058 For EOB markers, which signify that the remainder of the block is one long zero
1059 run, the index is the zig-zag index of the first zero coefficient in that run.
1060 For EOB runs, the token index is that of the first EOB marker in the run.
1061 Due to zero runs and EOB markers, a block does not have to have a token for
1062 every zig-zag index.
1064 Tokens are grouped in the stream by token index, not by the block they
1065 originate from.
1066 This means that for each zig-zag index in turn, the tokens with that index from
1067 {\em all} the coded blocks are coded in coded block order.
1068 When decoding, a current token index is maintained for each coded block.
1069 This index is advanced by the number of coefficients that are added to the
1070 block as each token is decoded.
1071 After fully decoding all the tokens with token index \ti, the current token
1072 index of every coded block will be \ti\ or greater.
1074 If an EOB run of $n$ blocks is decoded at token index \ti, then it ends the
1075 next $n$ blocks in coded block order whose current token index is equal to
1076 \ti, but not greater.
1077 If there are fewer than $n$ blocks with a current token index of \ti, then the
1078 decoder goes through the coded block list again from the start, ending blocks
1079 with a current token index of $\ti+1$, and so on, until $n$ blocks have been
1080 ended.
1082 Tokens are read by parsing a Huffman code that depends on \ti\ and the color
1083 plane of the next coded block whose current token index is equal to \ti, but
1084 not greater.
1085 The Huffman codebooks are selected on a per-frame basis from the 80 codebooks
1086 defined in the setup header.
1087 Many tokens have a fixed number of \term{extra bits} associated with them.
1088 These bits are read from the packet immediately after the token is decoded.
1089 These are used to define things such as coefficient magnitude, sign, and the
1090 length of runs.
1092 \paragraph{DC Prediction}
1094 After the coefficients for each block are decoded, the quantized DC value of
1095 each block is adjusted based on the DC values of its neighbors.
1096 This adjustment is performed by scanning the blocks in raster order, not coded
1097 block order.
1099 \paragraph{Reconstruction}
1101 Finally, using the coding mode, motion vector (if applicable), quantized
1102 coefficient list, and \qi\ value defined for each block, all the coded blocks
1103 are reconstructed.
1104 The DCT coefficients are dequantized, an inverse DCT transform is applied, and
1105 the predictor is formed from the coding mode and motion vector and added to
1106 the result.
1108 \paragraph{Loop Filtering}
1110 To complete the reconstructed frame, an ``in-loop'' deblocking filter is
1111 applied to the edges of all coded blocks.
1114 \chapter{Video Formats}
1116 This section gives a precise description of the video formats that Theora is
1117 capable of storing.
1118 The Theora bitstream is capable of handling video at any arbitrary resolution
1119 up to $1048560\times 1048560$.
1120 Such video would require almost three terabytes of storage per frame for
1121 uncompressed data, so compliant decoders MAY refuse to decode images with
1122 sizes beyond their capabilities.
1123 %TODO: What MUST a "compliant" decoder accept?
1124 %TODO: What SHOULD a decoder use for an upper bound? (derive from total amount
1125 %TODO: of memory and memory bandwidth)
1126 %TODO: Any lower limits?
1127 %TODO: We really need hardware device profiles, but such things should be
1128 %TODO: developed with input from the hardware community.
1129 %TODO: And even then sometimes they're useless
1131 The remainder of this section talks about two specific aspects of the video
1132 format: the color space and the pixel format.
1133 The first describes how color is represented and how to transform that color
1134 representation into a device independent color space such as CIE $XYZ$ (1931).
1135 The second describes the various schemes for sampling the color values in time
1136 and space.
1138 \section{Color Space Conventions}
1140 There are a large number of different color standards used in digital video.
1141 Since Theora is a lossy codec, it restricts itself to only a few of them to
1142 simplify playback.
1143 Unlike the alternate method of describing all the parameters of the color
1144 model, this allows a few dedicated routines for color conversion to be written
1145 and heavily optimized in a decoder.
1146 More flexible conversion functions should instead be specified in an encoder,
1147 where additional computational complexity is more easily tolerated.
1148 The color spaces were selected to give a fair representation of color standards
1149 in use around the world today.
1150 Most of the standards that do not exactly match one of these can be converted
1151 to one fairly easily.
1153 All Theora color spaces are $Y'C_bC_r$ color spaces with one luma channel and
1154 two chroma channels.
1155 Each channel contains 8-bit discrete values in the range $0\ldots255$, which
1156 represent non-linear gamma pre-corrected signals.
1157 The Theora identification header contains an 8-bit value that describes the
1158 color space.
1159 This merely selects one of the color spaces available from an enumerated list.
1160 Currently, only two color spaces are defined, with a third possibility that
1161 indicates the color space is ``unknown".
1163 \section{Color Space Conversions and Parameters}
1164 \label{sec:color-xforms}
1166 The parameters which describe the conversions between each color space are
1167 listed below.
1168 These are the parameters needed to map colors from the encoded $Y'C_bC_r$
1169 representation to the device-independent color space CIE $XYZ$ (1931).
1170 These parameters define abstract mathematical conversion functions which are
1171 infinitely precise.
1172 The accuracy and precision with which the conversions are performed in a real
1173 system is determined by the quality of output desired and the available
1174 processing power.
1175 Exact decoder output is defined by this specification only in the original
1176 $Y'C_bC_r$ space.
1178 \begin{description}
1179 \item[$Y'C_bC_r$ to $Y'P_bP_r$:]
1180 \vspace{\baselineskip}\hfill
1182 This conversion takes 8-bit discrete values in the range $[0\ldots255]$ and
1183 maps them to real values in the range $[0\ldots1]$ for Y and
1184 $[-\frac{1}{2}\ldots\frac{1}{2}]$ for $P_b$ and $P_r$.
1185 Because some values may fall outside the offset and excursion defined for each
1186 channel in the $Y'C_bC_r$ space, the results may fall outside these ranges in
1187 $Y'P_bP_r$ space.
1188 No clamping should be done at this stage.
1190 \begin{align}
1191 Y'_\mathrm{out} & =
1192 \frac{Y'_\mathrm{in}-\mathrm{Offset}_Y}{\mathrm{Excursion}_Y} \\
1193 P_b & =
1194 \frac{C_b-\mathrm{Offset}_{C_b}}{\mathrm{Excursion}_{C_b}} \\
1195 P_r & =
1196 \frac{C_r-\mathrm{Offset}_{C_r}}{\mathrm{Excursion}_{C_r}}
1197 \end{align}
1199 Parameters: $\mathrm{Offset}_{Y,C_b,C_r}$, $\mathrm{Excursion}_{Y,C_b,C_r}$.
1201 \item[$Y'P_bP_r$ to $R'G'B'$:]
1202 \vspace{\baselineskip}\hfill
1204 This conversion takes the one luma and two chroma channel representation and
1205 maps it to the non-linear $R'G'B'$ space used to drive actual output devices.
1206 Values should be clamped into the range $[0\ldots1]$ after this stage.
1208 \begin{align}
1209 R' & = Y'+2(1-K_r)P_r \\
1210 G' & = Y'-2\frac{(1-K_b)K_b}{1-K_b-K_r}P_b-2\frac{(1-K_r)K_r}{1-K_b-K_r}P_r\\
1211 B' & = Y'+2(1-K_b)P_b
1212 \end{align}
1214 Parameters: $K_b,K_r$.
1216 \item[$R'G'B'$ to $RGB$ (Output device gamma correction):]
1217 \vspace{\baselineskip}\hfill
1219 This conversion takes the non-linear $R'G'B'$ voltage levels and maps them to
1220 linear light levels produced by the actual output device.
1221 Note that this conversion is only that of the output device, and its inverse is
1222 {\em not} that used by the input device.
1223 Because a dim viewing environment is assumed in most television standards, the
1224 overall gamma between the input and output devices is usually around $1.1$ to
1225 $1.2$, and not a strict $1.0$.
1227 For calibration with actual output devices, the model
1228 \begin{align}
1229 L & =(E'+\Delta)^\gamma
1230 \end{align}
1231 should be used, with $\Delta$ the free parameter and $\gamma$ held fixed to
1232 the value specified in this document.
1233 The conversion function presented here is an idealized version with $\Delta=0$.
1235 \begin{align}
1236 R & = R'^\gamma \\
1237 G & = G'^\gamma \\
1238 B & = B'^\gamma
1239 \end{align}
1241 Parameters: $\gamma$.
1243 \item[$RGB$ to $R'G'B'$ (Input device gamma correction):]
1244 \vspace{\baselineskip}\hfill
1246 %TODO: Tag section as non-normative
1248 This conversion takes linear light levels and maps them to the non-linear
1249 voltage levels produced in the actual input device.
1250 This information is merely informative.
1251 It is not required for building a decoder or for converting between the various
1252 formats and the actual output capabilities of a particular device.
1254 A linear segment is introduced on the low end to reduce noise in dark areas of
1255 the image.
1256 The rest of the scale is adjusted so that the power segment of the curve
1257 intersects the linear segment with the proper slope, and so that it still maps
1258 0 to 0 and 1 to 1.
1260 \begin{align}
1261 R' & = \left\{
1262 \begin{array}{ll}
1263 \alpha R, & 0\le R<\delta \\
1264 (1+\epsilon)R^\beta-\epsilon, & \delta\le R\le1
1265 \end{array}\right. \\
1266 G' & = \left\{
1267 \begin{array}{ll}
1268 \alpha G, & 0\le G<\delta \\
1269 (1+\epsilon)G^\beta-\epsilon, & \delta\le G\le1
1270 \end{array}\right. \\
1271 B' & = \left\{
1272 \begin{array}{ll}
1273 \alpha B, & 0\le B<\delta \\
1274 (1+\epsilon)B^\beta-\epsilon, & \delta\le B\le1
1275 \end{array}\right.
1276 \end{align}
1278 Parameters: $\beta$, $\alpha$, $\delta$, $\epsilon$.
1280 \item[$RGB$ to CIE $XYZ$ (1931):]
1281 \vspace{\baselineskip}\hfill
1283 This conversion maps a device-dependent linear RGB space to the
1284 device-independent linear CIE $XYZ$ space.
1285 The parameters are the CIE chromaticity coordinates of the three
1286 primaries---red, green, and blue---as well as the chromaticity coordinates
1287 of the white point of the device.
1288 This is how hardware manufacturers and standards typically describe a
1289 particular $RGB$ space.
1290 The math required to convert these parameters into a useful transformation
1291 matrix is reproduced below.
1293 \begin{align}
1294 F & =
1295 \left[\begin{array}{ccc}
1296 \frac{x_r}{y_r} & \frac{x_g}{y_g} & \frac{x_b}{y_b} \\
1297 1 & 1 & 1 \\
1298 \frac{1-x_r-y_r}{y_r} & \frac{1-x_g-y_g}{y_g} & \frac{1-x_b-y_b}{y_b}
1299 \end{array}\right] \\
1300 \left[\begin{array}{c}
1301 s_r \\
1302 s_g \\
1304 \end{array}\right] & =
1305 F^{-1}\left[\begin{array}{c}
1306 \frac{x_w}{y_w} \\
1307 1 \\
1308 \frac{1-x_w-y_w}{y_w}
1309 \end{array}\right] \\
1310 \left[\begin{array}{c}
1311 X \\
1312 Y \\
1314 \end{array}\right] & =
1315 F\left[\begin{array}{c}
1316 s_rR \\
1317 s_gG \\
1318 s_bB
1319 \end{array}\right]
1320 \end{align}
1321 Parameters: $x_r,x_g,x_b,x_w, y_r,y_g,y_b,y_w$.
1323 \end{description}
1325 \section{Available Color Spaces}
1326 \label{sec:colorspaces}
1328 These are the color spaces currently defined for use by Theora video.
1329 Each one has a short name, with which it is referred to in this document, and
1330 a more detailed specification of the standards from which its parameters are
1331 derived.
1332 Some standards do not specify all the parameters necessary.
1333 For these unspecified parameters, this document serves as the definition of
1334 what should be used when encoding or decoding Theora video.
1336 \subsection{Rec.~470M (Rec.~ITU-R~BT.470-6 System M/NTSC with
1337 Rec.~ITU-R~BT.601-5)}
1338 \label{sec:470m}
1340 This color space is used by broadcast television and DVDs in much of the
1341 Americas, Japan, Korea, and the Union of Myanmar \cite{rec470}.
1342 This color space may also be used for System M/PAL (Brazil), with an
1343 appropriate conversion supplied by the encoder to compensate for the
1344 different gamma value.
1345 See Section~\ref{sec:470bg} for an appropriate gamma value to assume for M/PAL
1346 input.
1348 In the US, studio monitors are adjusted to a D65 white point
1349 ($x_w,y_w=0.313,0.329$).
1350 In Japan, studio monitors are adjusted to a D white of 9300K
1351 ($x_w,y_w=0.285,0.293$).
1353 Rec.~470 does not specify a digital encoding of the color signals.
1354 For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the
1355 $R'G'B'$ signals specified by Rec.~470.
1357 Rec.~470 does not specify an input gamma function.
1358 For Theora, the Rec.~709 \cite{rec709} input function is assumed.
1359 This is the same as that specified by SMPTE 170M \cite{smpte170m}, which claims
1360 to reflect modern practice in the creation of NTSC signals circa 1994.
1362 The parameters for all the color transformations defined in
1363 Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470m}.
1365 \begin{table}[htb]
1366 \begin{align*}
1367 \mathrm{Offset}_{Y,C_b,C_r} & = (16, 128, 128) \\
1368 \mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\
1369 K_r & = 0.299 \\
1370 K_b & = 0.114 \\
1371 \gamma & = 2.2 \\
1372 \beta & = 0.45 \\
1373 \alpha & = 4.5 \\
1374 \delta & = 0.018 \\
1375 \epsilon & = 0.099 \\
1376 x_r,y_r & = 0.67, 0.33 \\
1377 x_g,y_g & = 0.21, 0.71 \\
1378 x_b,y_b & = 0.14, 0.08 \\
1379 \text{(Illuminant C) } x_w,y_w & = 0.310, 0.316 \\
1380 \end{align*}
1381 \caption{Rec.~470M Parameters}
1382 \label{tab:470m}
1383 \end{table}
1385 \subsection{Rec.~470BG (Rec.~ITU-R~BT.470-6 Systems B and G with
1386 Rec.~ITU-R~BT.601-5)}
1387 \label{sec:470bg}
1389 This color space is used by the PAL and SECAM systems in much of the rest of
1390 the world \cite{rec470}
1391 This can be used directly by systems (B, B1, D, D1, G, H, I, K, N)/PAL and (B,
1392 D, G, H, K, K1, L)/SECAM\@.
1394 \begin{verse}
1395 {\bf Note:} the Rec.~470BG chromaticity values are different from those
1396 specified in Rec.~470M\@.
1397 When PAL and SECAM systems were first designed, they were based upon the same
1398 primaries as NTSC\@.
1399 However, as methods of making color picture tubes have changed, the primaries
1400 used have changed as well.
1401 The U.S. recommends using correction circuitry to approximate the existing,
1402 standard NTSC primaries.
1403 Current PAL and SECAM systems have standardized on primaries in accord with
1404 more recent technology.
1405 \end{verse}
1407 Rec.~470 provisionally permits the use of the NTSC chromaticity values (given
1408 in Section~\ref{sec:470m}) with legacy PAL and SECAM equipment.
1409 In Theora, material must be decoded assuming the new PAL and SECAM primaries.
1410 Material intended for display on old legacy devices should be converted by the
1411 decoder.
1413 The official Rec.~470BG specifies a gamma value of $\gamma=2.8$.
1414 However, in practice this value is unrealistically high \cite{Poyn97}.
1415 Rec.~470BG states that the overall system gamma should be approximately
1416 $\gamma\beta=1.2$.
1417 Since most cameras pre-correct with a gamma value of $\beta=0.45$,
1418 this suggests an output device gamma of approximately $\gamma=2.67$.
1419 This is the value recommended for use with PAL systems in Theora.
1421 Rec.~470 does not specify a digital encoding of the color signals.
1422 For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the
1423 $R'G'B'$ signals specified by Rec.~470.
1425 Rec.~470 does not specify an input gamma function.
1426 For Theora, the Rec 709 \cite{rec709} input function is assumed.
1428 The parameters for all the color transformations defined in
1429 Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470bg}.
1431 \begin{table}[htb]
1432 \begin{align*}
1433 \mathrm{Offset}_{Y,C_b,C_r} & = (16, 128, 128) \\
1434 \mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\
1435 K_r & = 0.299 \\
1436 K_b & = 0.114 \\
1437 \gamma & = 2.67 \\
1438 \beta & = 0.45 \\
1439 \alpha & = 4.5 \\
1440 \delta & = 0.018 \\
1441 \epsilon & = 0.099 \\
1442 x_r,y_r & = 0.64, 0.33 \\
1443 x_g,y_g & = 0.29, 0.60 \\
1444 x_b,y_b & = 0.15, 0.06 \\
1445 \text{(D65) } x_w,y_w & = 0.313, 0.329 \\
1446 \end{align*}
1447 \caption{Rec.~470BG Parameters}
1448 \label{tab:470bg}
1449 \end{table}
1451 \section{Pixel Formats}
1452 \label{sec:pixfmts}
1454 Theora supports several different pixel formats, each of which uses different
1455 subsampling for the chroma planes relative to the luma plane.
1457 \subsection{4:4:4 Subsampling}
1458 \label{sec:444}
1460 All three color planes are stored at full resolution - each pixel has a $Y'$,
1461 a $C_b$ and a $C_r$ value (see Figure~\ref{fig:pixel444}).
1462 The samples in the different planes are all at co-located sites.
1464 \begin{figure}[htbp]
1465 \begin{center}
1466 \includegraphics{pixel444}
1467 \end{center}
1468 \caption{Pixels encoded 4:4:4}
1469 \label{fig:pixel444}
1470 \end{figure}
1472 % Figure.
1473 %YRB YRB
1477 %YRB YRB
1483 \subsection{4:2:2 Subsampling}
1484 \label{sec:422}
1486 The $C_b$ and $C_r$ planes are stored with half the horizontal resolution of
1487 the $Y'$ plane.
1488 Thus, each of these planes has half the number of horizontal blocks as the luma
1489 plane (see Figure~\ref{fig:pixel422}).
1490 Similarly, they have half the number of horizontal super blocks, rounded up.
1491 Macro blocks are defined across color planes, and so their number does not
1492 change, but each macro block contains half as many chroma blocks.
1494 The chroma samples are vertically aligned with the luma samples, but
1495 horizontally centered between two luma samples.
1496 Thus, each luma sample has a unique closest chroma sample.
1497 A horizontal phase shift may be required to produce signals which use different
1498 horizontal chroma sampling locations for compatibility with different systems.
1500 \begin{figure}[htbp]
1501 \begin{center}
1502 \includegraphics{pixel422}
1503 \end{center}
1504 \caption{Pixels encoded 4:2:2}
1505 \label{fig:pixel422}
1506 \end{figure}
1508 % Figure.
1509 %Y RB Y Y RB Y
1513 %Y RB Y Y RB Y
1518 \subsection{4:2:0 Subsampling}
1519 \label{sec:420}
1521 The $C_b$ and $C_r$ planes are stored with half the horizontal and half the
1522 vertical resolution of the $Y'$ plane.
1523 Thus, each of these planes has half the number of horizontal blocks and half
1524 the number of vertical blocks as the luma plane, for a total of one quarter
1525 the number of blocks (see Figure~\ref{fig:pixel420}).
1526 Similarly, they have half the number of horizontal super blocks and half the
1527 number of vertical super blocks, rounded up.
1528 Macro blocks are defined across color planes, and so their number does not
1529 change, but each macro block contains within it one quarter as many
1530 chroma blocks.
1532 The chroma samples are vertically and horizontally centered between four luma
1533 samples.
1534 Thus, each luma sample has a unique closest chroma sample.
1535 This is the same sub-sampling pattern used with JPEG, MJPEG, and MPEG-1, and
1536 was inherited from VP3.
1537 A horizontal or vertical phase shift may be required to produce signals which
1538 use different chroma sampling locations for compatibility with different
1539 systems.
1541 \begin{figure}[htbp]
1542 \begin{center}
1543 \includegraphics{pixel420}
1544 \end{center}
1545 \caption{Pixels encoded 4:2:0}
1546 \label{fig:pixel420}
1547 \end{figure}
1549 % Figure.
1550 %Y Y Y Y
1552 % RB RB
1554 %Y Y Y Y
1558 %Y Y Y Y
1560 % RB RB
1562 %Y Y Y Y
1567 \subsection{Subsampling and the Picture Region}
1569 Although the frame size must be an integral number of macro blocks, and thus
1570 both the number of pixels and the number of blocks in each direction must be
1571 even, no such requirement is made of the picture region.
1572 Thus, when using subsampled pixel formats, careful attention must be paid to
1573 which chroma samples correspond to which luma samples.
1575 As mentioned above, for each pixel format, there is a unique chroma sample that
1576 is the closest to each luma sample.
1577 When cropping the chroma planes to the picture region, all the chroma samples
1578 corresponding to a luma sample in the cropped picture region must be included.
1579 Thus, when dividing the width or height of the picture region by two to obtain
1580 the size of the subsampled chroma planes, they must be rounded up.
1582 Furthermore, the sampling locations are defined relative to the frame,
1583 {\em not} the picture region.
1584 When using the 4:2:2 and 4:2:0 formats, the locations of chroma samples
1585 relative to the luma samples depends on whether or not the X offset of the
1586 picture region is odd.
1587 If the offset is even, each column of chroma samples corresponds to two columns
1588 of luma samples (see Figure~\ref{fig:pic_even} for an example).
1589 The only exception is if the width is odd, in which case the last column
1590 corresponds to only one column of luma samples (see Figure~\ref{fig:pic_even_odd}).
1591 If the offset is odd, then the first column of chroma samples corresponds to
1592 only one column of luma samples, while the remaining columns each correspond
1593 to two (see Figure~\ref{fig:pic_odd}).
1594 In this case, if the width is even, the last column again corresponds to only
1595 one column of luma samples (see Figure~\ref{fig:pic_odd_even}).
1597 A similar process is followed with the rows of a picture region of odd height
1598 encoded in the 4:2:0 format.
1599 If the Y offset is even, each row of chroma samples corresponds to two rows of
1600 luma samples (see Figure~\ref{fig:pic_even}), except with an odd height, where
1601 the last row corresponds to one row of chroma luna samples only (see
1602 Figure~\ref{fig:pic_even_odd}).
1603 If the offset is odd, then it is the first row of chroma samples which
1604 corresponds to only one row of luma samples, while the remaining rows each
1605 correspond to two (Figure~\ref{fig:pic_odd}), except with an even height,
1606 where the last row also corresponds to one (Figure~\ref{fig:pic_odd_even}).
1608 Encoders should be aware of these differences in the subsampling when using an
1609 even or odd offset.
1610 In the typical case, with an even width and height, where one expects two rows
1611 or columns of luma samples for every row or column of chroma samples, the
1612 encoder must take care to ensure that the offsets used are both even.
1614 \begin{figure}[htbp]
1615 \begin{center}
1616 \includegraphics[width=\textwidth]{pic_even}
1617 \end{center}
1618 \caption{Pixel correspondence between color planes with even picture
1619 offset and even picture size}
1620 \label{fig:pic_even}
1621 \end{figure}
1623 \begin{figure}[htbp]
1624 \begin{center}
1625 \includegraphics[width=\textwidth]{pic_even_odd}
1626 \end{center}
1627 \caption{Pixel correspondence with even picture offset and
1628 odd picture size}
1629 \label{fig:pic_even_odd}
1630 \end{figure}
1632 \begin{figure}[htbp]
1633 \begin{center}
1634 \includegraphics[width=\textwidth]{pic_odd}
1635 \end{center}
1636 \caption{Pixel correspondence with odd picture offset and
1637 odd picture size}
1638 \label{fig:pic_odd}
1639 \end{figure}
1641 \begin{figure}[htbp]
1642 \begin{center}
1643 \includegraphics[width=\textwidth]{pic_odd_even}
1644 \end{center}
1645 \caption{Pixel correspondence with odd picture offset and
1646 even picture size}
1647 \label{fig:pic_odd_even}
1648 \end{figure}
1651 \chapter{Bitpacking Convention}
1652 \label{sec:bitpacking}
1654 \section{Overview}
1656 The Theora codec uses relatively unstructured raw packets containing
1657 binary integer fields of arbitrary width.
1658 Logically, each packet is a bitstream in which bits are written one-by-one by
1659 the encoder and then read one-by-one in the same order by the decoder.
1660 Most current binary storage arrangements group bits into a native storage unit
1661 of eight bits (octets), sixteen bits, thirty-two bits, or less commonly other
1662 fixed sizes.
1663 The Theora bitpacking convention specifies the correct mapping of the logical
1664 packet bitstream into an actual representation in fixed-width units.
1666 \subsection{Octets and Bytes}
1668 In most contemporary architectures, a `byte' is synonymous with an `octect',
1669 that is, eight bits.
1670 For purposes of the bitpacking convention, a byte implies the smallest native
1671 integer storage representation offered by a platform.
1672 Modern file systems invariably offer bytes as the fundamental atom of storage.
1674 The most ubiquitous architectures today consider a `byte' to be an octet.
1675 Note, however, that the Theora bitpacking convention is still well defined for
1676 any native byte size; an implementation can use the native bit-width of a
1677 given storage system.
1678 This document assumes that a byte is one octet for purposes of example only.
1680 \subsection{Words and Byte Order}
1682 A `word' is an integer size that is a grouped multiple of the byte size.
1683 Most architectures consider a word to be a group of two, four, or eight bytes.
1684 Each byte in the word can be ranked by order of `significance', e.g.\ the
1685 significance of the bits in each byte when storing a binary integer in the
1686 word.
1687 Several byte orderings are possible in a word.
1688 The common ones are
1689 \begin{itemize}
1690 \item{Big-endian:}
1691 in which the most significant byte comes first, e.g.\ 3-2-1-0,
1692 \item{Little-endian:}
1693 in which the least significant byte comes first, e.g.\ 0-1-2-3, and
1694 \item{Mixed-endian:}
1695 one of the less-common orderings that cannot be put into the above two
1696 categories, e.g.\ 3-1-2-0 or 0-2-1-3.
1697 \end{itemize}
1699 The Theora bitpacking convention specifies storage and bitstream manipulation
1700 at the byte, not word, level.
1701 Thus host word ordering is of a concern only during optimization, when writing
1702 code that operates on a word of storage at a time rather than a byte.
1703 Logically, bytes are always encoded and decoded in order from byte zero through
1704 byte $n$.
1706 \subsection{Bit Order}
1708 A byte has a well-defined `least significant' bit (LSb), which is the only bit
1709 set when the byte is storing the two's complement integer value $+1$.
1710 A byte's `most significant' bit (MSb) is at the opposite end.
1711 Bits in a byte are numbered from zero at the LSb to $n$ for the MSb, where
1712 $n=7$ in an octet.
1714 \section{Coding Bits into Bytes}
1716 The Theora codec needs to encode arbitrary bit-width integers from zero to 32
1717 bits wide into packets.
1718 These integer fields are not aligned to the boundaries of the byte
1719 representation; the next field is read at the bit position immediately
1720 after the end of the previous field.
1722 The decoder logically unpacks integers by first reading the MSb of a binary
1723 integer from the logical bitstream, followed by the next most significant
1724 bit, etc., until the required number of bits have been read.
1725 When unpacking the bytes into bits, the decoder begins by reading the MSb of
1726 the integer to be read from the most significant unread bit position of the
1727 source byte, followed by the next-most significant bit position of the
1728 destination integer, and so on up to the requested number of bits.
1729 Note that this differs from the Vorbis I codec, which
1730 begins decoding with the LSb of the source integer, reading it from the
1731 LSb of the source byte.
1732 When all the bits of the current source byte are read, decoding continues with
1733 the MSb of the next byte.
1734 Any unfilled bits in the last byte of the packet MUST be cleared to zero by the
1735 encoder.
1737 \subsection{Signedness}
1739 The binary integers decoded by the above process may be either signed or
1740 unsigned.
1741 This varies from integer to integer, and this specification
1742 indicates how each value should be interpreted as it is read.
1743 That is, depending on context, the three bit binary pattern \bin{111} can be
1744 taken to represent either `$7$' as an unsigned integer or `$-1$' as a signed,
1745 two's complement integer.
1747 \subsection{Encoding Example}
1749 The following example shows the state of an (8-bit) byte stream after several
1750 binary integers are encoded, including the location of the put pointer for the
1751 next bit to write to and the total length of the stream in bytes.
1753 Encode the 4 bit unsigned integer value `12' (\bin{1100}) into an empty byte
1754 stream.
1756 \begin{tabular}{r|ccccccccl}
1757 \multicolumn{1}{r}{}& &&&&$\downarrow$&&&& \\
1758 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1759 byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1760 0 & 0 & 0 & 0 & $\leftarrow$ \\
1761 byte 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1762 byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1763 byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1764 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\
1765 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1766 byte stream length: 1 byte
1767 \end{tabular}
1768 \vspace{\baselineskip}
1770 Continue by encoding the 3 bit signed integer value `-1' (\bin{111}).
1772 \begin{tabular}{r|ccccccccl}
1773 \multicolumn{1}{r}{} &&&&&&&&$\downarrow$& \\
1774 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1775 byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1776 \textbf{1} & \textbf{1} & \textbf{1} & 0 & $\leftarrow$ \\
1777 byte 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1778 byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1779 byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1780 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\
1781 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1782 byte stream length: 1 byte
1783 \end{tabular}
1784 \vspace{\baselineskip}
1786 Continue by encoding the 7 bit integer value `17' (\bin{0010001}).
1788 \begin{tabular}{r|ccccccccl}
1789 \multicolumn{1}{r}{} &&&&&&&$\downarrow$&& \\
1790 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1791 byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1792 \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\
1793 byte 1 & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} &
1794 \textbf{0} & \textbf{1} & 0 & 0 & $\leftarrow$ \\
1795 byte 2 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1796 byte 3 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \\
1797 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\
1798 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1799 byte stream length: 2 bytes
1800 \end{tabular}
1801 \vspace{\baselineskip}
1803 Continue by encoding the 13 bit integer value `6969' (\bin{11011\ 00111001}).
1805 \begin{tabular}{r|ccccccccl}
1806 \multicolumn{1}{r}{} &&&&$\downarrow$&&&&& \\
1807 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1808 byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1809 \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\
1810 byte 1 & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} &
1811 \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\
1812 byte 2 & \textbf{0} & \textbf{1} & \textbf{1} & \textbf{0} &
1813 \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\
1814 byte 3 & \textbf{0} & \textbf{0} & \textbf{1} &
1815 0 & 0 & 0 & 0 & 0 & $\leftarrow$ \\
1816 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\
1817 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1818 byte stream length: 4 bytes
1819 \end{tabular}
1820 \vspace{\baselineskip}
1822 \subsection{Decoding Example}
1824 The following example shows the state of the (8-bit) byte stream encoded in the
1825 previous example after several binary integers are decoded, including the
1826 location of the get pointer for the next bit to read.
1828 Read a two bit unsigned integer from the example encoded above.
1830 \begin{tabular}{r|ccccccccl}
1831 \multicolumn{1}{r}{} &&&$\downarrow$&&&&&& \\
1832 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1833 byte 0 & \textbf{1} & \textbf{1} & 0 & 0 & 1 & 1 & 1 & 0 & $\leftarrow$ \\
1834 byte 1 & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 & \\
1835 byte 2 & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 & \\
1836 byte 3 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 &
1837 byte stream length: 4 bytes
1838 \end{tabular}
1839 \vspace{\baselineskip}
1841 Value read: 3 (\bin{11}).
1843 Read another two bit unsigned integer from the example encoded above.
1845 \begin{tabular}{r|ccccccccl}
1846 \multicolumn{1}{r}{} &&&&&$\downarrow$&&&& \\
1847 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1848 byte 0 & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1849 1 & 1 & 1 & 0 & $\leftarrow$ \\
1850 byte 1 & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 & \\
1851 byte 2 & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 & \\
1852 byte 3 & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 &
1853 byte stream length: 4 bytes
1854 \end{tabular}
1855 \vspace{\baselineskip}
1857 Value read: 0 (\bin{00}).
1859 Two things are worth noting here.
1860 \begin{itemize}
1861 \item
1862 Although these four bits were originally written as a single four-bit integer,
1863 reading some other combination of bit-widths from the bitstream is well
1864 defined.
1865 No artificial alignment boundaries are maintained in the bitstream.
1866 \item
1867 The first value is the integer `$3$' only because the context stated we were
1868 reading an unsigned integer.
1869 Had the context stated we were reading a signed integer, the returned value
1870 would have been the integer `$-1$'.
1871 \end{itemize}
1873 \subsection{End-of-Packet Alignment}
1875 The typical use of bitpacking is to produce many independent byte-aligned
1876 packets which are embedded into a larger byte-aligned container structure,
1877 such as an Ogg transport bitstream.
1878 Externally, each bitstream encoded as a byte stream MUST begin and end on a
1879 byte boundary.
1880 Often, the encoded packet bitstream is not an integer number of bytes, and so
1881 there is unused space in the last byte of a packet.
1883 %r: I think the generality here is necessary to be consistent with our assertions
1884 %r: elsewhere about being independent of transport and byte width
1885 When a Theora encoder produces packets for embedding in a byte-aligned
1886 container, unused space in the last byte of a packet is always zeroed during
1887 the encoding process.
1888 Thus, should this unused space be read, it will return binary zeroes.
1889 There is no marker pattern or stuffing bits that will allow the decoder to
1890 obtain the exact size, in bits, of the original bitstream.
1891 This knowledge is not required for decoding.
1893 Attempting to read past the end of an encoded packet results in an
1894 `end-of-packet' condition.
1895 Any further read operations after an `end-of-packet' condition shall also
1896 return `end-of-packet'.
1897 Unlike Vorbis, Theora does not use truncated packets as a normal mode of
1898 operation.
1899 Therefore if a decoder encounters the `end-of-packet' condition during normal
1900 decoding, it may attempt to use the bits that were read to recover as much of
1901 encoded data as possible, signal a warning or error, or both.
1903 \subsection{Reading Zero Bit Integers}
1905 Reading a zero bit integer returns the value `$0$' and does not increment
1906 the stream pointer.
1907 Reading to the end of the packet, but not past the end, so that an
1908 `end-of-packet' condition is not triggered, and then reading a zero bit
1909 integer shall succeed, returning `$0$', and not trigger an `end-of-packet'
1910 condition.
1911 Reading a zero bit integer after a previous read sets the `end-of-packet'
1912 condition shall fail, also returning `end-of-packet'.
1914 \chapter{Bitstream Headers}
1915 \label{sec:headers}
1917 A Theora bitstream begins with three header packets.
1918 The header packets are, in order, the identification header, the comment
1919 header, and the setup header.
1920 All are required for decode compliance.
1921 An end-of-packet condition encountered while decoding the identification or
1922 setup header packets renders the stream undecodable.
1923 An end-of-packet condition encountered while decode the comment header is a
1924 non-fatal error condition, and MAY be ignored by a decoder.
1926 \paragraph{VP3 Compatibility}
1928 VP3 relies on the headers provided by its container, usually either AVI or
1929 Quicktime.
1930 As such, several parameters available in these headers are not available to VP3
1931 streams.
1932 These are indicated as they appear in the sections below.
1934 \section{Common Header Decode}
1935 \label{sub:common-header}
1937 \paragraph{Input parameters:} None.
1939 \paragraph{Output parameters:}\hfill\\*
1940 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
1941 \multicolumn{1}{c}{Name} &
1942 \multicolumn{1}{c}{Type} &
1943 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
1944 \multicolumn{1}{c}{Signed?} &
1945 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
1946 \bitvar{HEADERTYPE} & Integer & 8 & No & The type of the header being
1947 decoded. \\
1948 \bottomrule\end{tabularx}
1950 \paragraph{Variables used:} None.
1951 \medskip
1953 Each header packet begins with the same header fields, which are decoded as
1954 follows:
1956 \begin{enumerate}
1957 \item
1958 Read an 8-bit unsigned integer as \bitvar{HEADERTYPE}.
1959 If the most significant bit of this integer is not set, then stop.
1960 This is not a header packet.
1961 \item
1962 Read 6 8-bit unsigned integers.
1963 If these do not have the values \hex{74}, \hex{68}, \hex{65}, \hex{6F},
1964 \hex{72}, and \hex{61}, respectively, then stop.
1965 This stream is not decodable by this specification.
1966 These values correspond to the ASCII values of the characters `t', `h', `e',
1967 `o', `r', and `a'.
1968 \end{enumerate}
1970 Decode continues according to \bitvar{HEADERTYPE}.
1971 The identification header is type \hex{80}, the comment header is type
1972 \hex{81}, and the setup header is type \hex{82}.
1973 These packets must occur in the order: identification, comment, setup.
1974 %r: I clarified the initial-bit scheme here
1975 %TBT: Dashes let the reader know they'll have to pick up the rest of the
1976 %TBT: sentence after the explanatory phrase.
1977 %TBT: Otherwise it just sounds like the bit must exist.
1978 All header packets have the most significant bit of the type
1979 field---which is the initial bit in the packet---set.
1980 This distinguishes them from video data packets in which the first bit
1981 is unset.
1982 % extra header packets are a feature Dan argued for way back when for
1983 % backward-compatible extensions (and icc colourspace for example)
1984 % I think it's reasonable
1985 %TBT: You can always just stick more stuff in the setup header.
1986 Packets with other header types (\hex{83}--\hex{FF}) are reserved and MUST be
1987 ignored.
1989 \section{Identification Header Decode}
1990 \label{sec:idheader}
1992 \paragraph{Input parameters:} None.
1994 \paragraph{Output parameters:}\hfill\\*
1995 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
1996 \multicolumn{1}{c}{Name} &
1997 \multicolumn{1}{c}{Type} &
1998 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
1999 \multicolumn{1}{c}{Signed?} &
2000 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2001 \bitvar{VMAJ} & Integer & 8 & No & The major version number. \\
2002 \bitvar{VMIN} & Integer & 8 & No & The minor version number. \\
2003 \bitvar{VREV} & Integer & 8 & No & The version revision number. \\
2004 \bitvar{FMBW} & Integer & 16 & No & The width of the frame in macro
2005 blocks. \\
2006 \bitvar{FMBH} & Integer & 16 & No & The height of the frame in macro
2007 blocks. \\
2008 \bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a
2009 frame. \\
2010 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
2011 frame. \\
2012 \bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a
2013 frame. \\
2014 \bitvar{PICW} & Integer & 20 & No & The width of the picture region in
2015 pixels. \\
2016 \bitvar{PICH} & Integer & 20 & No & The height of the picture region in
2017 pixels. \\
2018 \bitvar{PICX} & Integer & 8 & No & The X offset of the picture region in
2019 pixels. \\
2020 \bitvar{PICY} & Integer & 8 & No & The Y offset of the picture region in
2021 pixels. \\
2022 \bitvar{FRN} & Integer & 32 & No & The frame-rate numerator. \\
2023 \bitvar{FRD} & Integer & 32 & No & The frame-rate denominator. \\
2024 \bitvar{PARN} & Integer & 24 & No & The pixel aspect-ratio numerator. \\
2025 \bitvar{PARD} & Integer & 24 & No & The pixel aspect-ratio denominator. \\
2026 \bitvar{CS} & Integer & 8 & No & The color space. \\
2027 \bitvar{PF} & Integer & 2 & No & The pixel format. \\
2028 \bitvar{NOMBR} & Integer & 24 & No & The nominal bitrate of the stream, in
2029 bits per second. \\
2030 \bitvar{QUAL} & Integer & 6 & No & The quality hint. \\
2031 \bitvar{KFGSHIFT} & Integer & 5 & No & The amount to shift the key frame
2032 number by in the granule position. \\
2033 \bottomrule\end{tabularx}
2035 \paragraph{Variables used:} None.
2036 \medskip
2038 The identification header is a short header with only a few fields used to
2039 declare the stream definitively as Theora and provide detailed information
2040 about the format of the fully decoded video data.
2041 The identification header is decoded as follows:
2043 \begin{enumerate}
2044 \item
2045 Decode the common header fields according to the procedure described in
2046 Section~\ref{sub:common-header}.
2047 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{80}, then stop.
2048 This packet is not the identification header.
2049 \item
2050 Read an 8-bit unsigned integer as \bitvar{VMAJ}.
2051 If \bitvar{VMAJ} is not $3$, then stop.
2052 This stream is not decodable according to this specification.
2053 \item
2054 Read an 8-bit unsigned integer as \bitvar{VMIN}.
2055 If \bitvar{VMIN} is not $2$, then stop.
2056 This stream is not decodable according to this specification.
2057 \item
2058 Read an 8-bit unsigned integer as \bitvar{VREV}.
2059 If \bitvar{VREV} is not $0$, then stop.
2060 This stream is not decodable according to this specification.
2061 \item
2062 Read a 16-bit unsigned integer as \bitvar{FMBW}.
2063 This MUST be greater than zero.
2064 This specifies the width of the coded frame in macro blocks.
2065 The actual width of the frame in pixels is $\bitvar{FMBW}*16$.
2066 \item
2067 Read a 16-bit unsigned integer as \bitvar{FMBH}.
2068 This MUST be greater than zero.
2069 This specifies the height of the coded frame in macro blocks.
2070 The actual height of the frame in pixels is $\bitvar{FMBH}*16$.
2071 \item
2072 Read a 24-bit unsigned integer as \bitvar{PICW}.
2073 This MUST be no greater than $(\bitvar{FMBW}*16)$.
2074 Note that 24 bits are read, even though only 20 bits are sufficient to specify
2075 any value of the picture width.
2076 This is done to preserve octet alignment in this header, to allow for a
2077 simplified parser implementation.
2078 \item
2079 Read a 24-bit unsigned integer as \bitvar{PICH}.
2080 This MUST be no greater than $(\bitvar{FMBH}*16)$.
2081 Together with \bitvar{PICW}, this specifies the size of the displayable picture
2082 region within the coded frame.
2083 See Figure~\ref{fig:pic-frame}.
2084 Again, 24 bits are read instead of 20.
2085 \item
2086 Read an 8-bit unsigned integer as \bitvar{PICX}.
2087 This MUST be no greater than $(\bitvar{FMBW}*16-\bitvar{PICX})$.
2088 \item
2089 Read an 8-bit unsigned integer as \bitvar{PICY}.
2090 This MUST be no greater than $(\bitvar{FMBH}*16-\bitvar{PICY})$.
2091 Together with \bitvar{PICX}, this specifies the location of the lower-left
2092 corner of the displayable picture region.
2093 See Figure~\ref{fig:pic-frame}.
2094 \item
2095 Read a 32-bit unsigned integer as \bitvar{FRN}.
2096 This MUST be greater than zero.
2097 \item
2098 Read a 32-bit unsigned integer as \bitvar{FRD}.
2099 This MUST be greater than zero.
2100 Theora is a fixed-frame rate video codec.
2101 Frames are sampled at the constant rate of $\frac{\bitvar{FRN}}{\bitvar{FRD}}$
2102 frames per second.
2103 The presentation time of the first frame is at zero seconds.
2104 No mechanism is provided to specify a non-zero offset for the initial
2105 frame.
2106 \item
2107 Read a 24-bit unsigned integer as \bitvar{PARN}.
2108 \item
2109 Read a 24-bit unsigned integer as \bitvar{PARD}.
2110 Together with \bitvar{PARN}, these specify the aspect ratio of the pixels
2111 within a frame, defined as the ratio of the physical width of a pixel to its
2112 physical height.
2113 This is given by the ratio $\bitvar{PARN}:\bitvar{PARD}$.
2114 Either of these fields MAY be zero, in which case the pixel aspect ratio
2115 defaults to $1:1$.
2116 \item
2117 Read an 8-bit unsigned integer as \bitvar{CS}.
2118 This is a value from an enumerated list of the available color spaces, given in
2119 Table~\ref{tab:colorspaces}.
2120 The `Undefined' value indicates that color space information was not available
2121 to the encoder.
2122 It MAY be specified by the application via an external means.
2123 If a reserved value is given, a decoder MAY refuse to decode the stream.
2124 \begin{table}[htbp]
2125 \begin{center}
2126 \begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule
2127 Value & Color Space \\\midrule
2128 $0$ & Undefined. \\
2129 $1$ & Rec.~470M (see Section~\ref{sec:470m}). \\
2130 $2$ & Rec.~470BG (see Section~\ref{sec:470bg}). \\
2131 $3$ & Reserved. \\
2132 $\vdots$ & \\
2133 $255$ & \\
2134 \bottomrule\end{tabular*}
2135 \end{center}
2136 \caption{Enumerated List of Color Spaces}
2137 \label{tab:colorspaces}
2138 \end{table}
2139 \item
2140 Read a 24-bit unsigned integer as \bitvar{NOMBR}.
2141 The \bitvar{NOMBR} field is used only as a hint.
2142 For pure VBR streams, this value may be considerably off.
2143 The field MAY be set to zero to indicate that the encoder did not care to
2144 speculate.
2145 %TODO: units?
2146 \item
2147 Read a 6-bit unsigned integer as \bitvar{QUAL}.
2148 This value is used to provide a hint as to the relative quality of the stream
2149 when compared to others produced by the same encoder.
2150 Larger values indicate higher quality.
2151 This can be used, for example, to select among several streams containing the
2152 same material encoded with different settings.
2153 \item
2154 Read a 5-bit unsigned integer as \bitvar{KFGSHIFT}.
2155 The \bitvar{KFGSHIFT} is used to partition the granule position associated with
2156 each packet into two different parts.
2157 The frame number of the last key frame, starting from zero, is stored in the
2158 upper $64-\bitvar{KFGSHIFT}$ bits, while the lower \bitvar{KFGSHIFT} bits
2159 contain the number of frames since the last keyframe.
2160 Complete details on the granule position mapping are specified in Section~REF.
2161 \item
2162 Read a 2-bit unsigned integer as \bitvar{PF}.
2163 The \bitvar{PF} field contains a value from an enumerated list of the available
2164 pixel formats, given in Table~\ref{tab:pixel-formats}.
2165 If the reserved value $1$ is given, stop.
2166 This stream is not decodable according to this specification.
2168 \begin{table}[htbp]
2169 \begin{center}
2170 \begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule
2171 Value & Pixel Format \\\midrule
2172 $0$ & 4:2:0 (see Section~\ref{sec:420}). \\
2173 $1$ & Reserved. \\
2174 $2$ & 4:2:2 (see Section~\ref{sec:422}). \\
2175 $3$ & 4:4:4 (see Section~\ref{sec:444}). \\
2176 \bottomrule\end{tabular*}
2177 \end{center}
2178 \caption{Enumerated List of Pixel Formats}
2179 \label{tab:pixel-formats}
2180 \end{table}
2182 \item
2183 Read a 3-bit unsigned integer.
2184 These bits are reserved.
2185 If this value is not zero, then stop.
2186 This stream is not decodable according to this specification.
2187 \item
2188 Assign \bitvar{NSBS} a value according to \bitvar{PF}, as given by
2189 Table~\ref{tab:nsbs-for-pf}.
2191 \begin{table}[bt]
2192 \begin{center}
2193 \begin{tabular}{cc}\toprule
2194 \bitvar{PF} & \bitvar{NSBS} \\\midrule
2195 $0$ & $\begin{aligned}
2196 &((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\
2197 & +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+3)//4)
2198 \end{aligned}$ \\\midrule
2199 $2$ & $\begin{aligned}
2200 &((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\
2201 & +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+1)//2)
2202 \end{aligned}$ \\\midrule
2203 $3$ & $3*((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)$ \\
2204 \bottomrule\end{tabular}
2205 \end{center}
2206 \caption{Number of Super Blocks for each Pixel Format}
2207 \label{tab:nsbs-for-pf}
2208 \end{table}
2210 \item
2211 Assign \bitvar{NBS} a value according to \bitvar{PF}, as given by
2212 Table~\ref{tab:nbs-for-pf}.
2214 \begin{table}[tb]
2215 \begin{center}
2216 \begin{tabular}{cc}\toprule
2217 \bitvar{PF} & \bitvar{NBS} \\\midrule
2218 $0$ & $6*\bitvar{FMBW}*\bitvar{FMBH}$ \\\midrule
2219 $2$ & $8*\bitvar{FMBW}*\bitvar{FMBH}$ \\\midrule
2220 $3$ & $12*\bitvar{FMBW}*\bitvar{FMBH}$ \\
2221 \bottomrule\end{tabular}
2222 \end{center}
2223 \caption{Number of Blocks for each Pixel Format}
2224 \label{tab:nbs-for-pf}
2225 \end{table}
2227 \item
2228 Assign \bitvar{NMBS} the value $(\bitvar{FMBW}*\bitvar{FMBH})$.
2230 \end{enumerate}
2232 \paragraph{VP3 Compatibility}
2234 VP3 does not correctly handle frame sizes that are not a multiple of 16.
2235 Thus, \bitvar{PICW} and \bitvar{PICH} should be set to the frame width and
2236 height in pixels, respectively, and \bitvar{PICX} and \bitvar{PICY} should be
2237 set to zero.
2238 VP3 headers do not specify a color space.
2239 VP3 only supports the 4:2:0 pixel format.
2241 \section{Comment Header}
2242 \label{sec:commentheader}
2244 The Theora comment header is the second of three header packets that begin a
2245 Theora stream.
2246 It is meant for short text comments, not aribtrary metadata; arbitrary metadata
2247 belongs in a separate logical stream that provides greater structure and
2248 machine parseability.
2250 %r: I tried to morph this a little more in the direction of our application space
2251 The comment field is meant to be used much like someone jotting a quick note on
2252 the label of a video.
2253 It should be a little information to remember the disc or tape by and explain it to
2254 others; a short, to-the-point text note that can be more than a couple words,
2255 but isn't going to be more than a short paragraph.
2256 The essentials, in other words, whatever they turn out to be, e.g.:
2258 %TODO: Example
2260 The comment header is stored as a logical list of eight-bit clean vectors; the
2261 number of vectors is bounded at $2^{32}-1$ and the length of each vector is
2262 limited to $2^{32}-1$ bytes.
2263 The vector length is encoded; the vector contents themselves are not null
2264 terminated.
2265 In addition to the vector list, there is a single vector for a vendor name,
2266 also eight-bit clean with a length encoded in 32 bits.
2267 %TODO: The 1.0 release of libtheora sets the vendor string to ...
2269 \subsection{Comment Length Decode}
2270 \label{sub:comment-len}
2272 \paragraph{Input parameters:} None.
2274 \paragraph{Output parameters:}\hfill\\*
2275 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2276 \multicolumn{1}{c}{Name} &
2277 \multicolumn{1}{c}{Type} &
2278 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2279 \multicolumn{1}{c}{Signed?} &
2280 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2281 \bitvar{LEN} & Integer & 32 & No & A single 32-bit length value. \\
2282 \bottomrule\end{tabularx}
2284 \paragraph{Variables used:}\hfill\\*
2285 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2286 \multicolumn{1}{c}{Name} &
2287 \multicolumn{1}{c}{Type} &
2288 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2289 \multicolumn{1}{c}{Signed?} &
2290 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2291 \locvar{LEN0} & Integer & 8 & No & The first octet of the string length. \\
2292 \locvar{LEN1} & Integer & 8 & No & The second octet of the string length. \\
2293 \locvar{LEN2} & Integer & 8 & No & The third octet of the string length. \\
2294 \locvar{LEN3} & Integer & 8 & No & The fourth octet of the string
2295 length. \\
2296 \bottomrule\end{tabularx}
2297 \medskip
2299 A single comment vector is decoded as follows:
2301 \begin{enumerate}
2302 \item
2303 Read an 8-bit unsigned integer as \locvar{LEN0}.
2304 \item
2305 Read an 8-bit unsigned integer as \locvar{LEN1}.
2306 \item
2307 Read an 8-bit unsigned integer as \locvar{LEN2}.
2308 \item
2309 Read an 8-bit unsigned integer as \locvar{LEN3}.
2310 \item
2311 Assign \bitvar{LEN} the value $(\locvar{LEN0}+(\locvar{LEN1}<<8)+
2312 (\locvar{LEN2}<<16)+(\locvar{LEN3}<<24))$.
2313 This construction is used so that on platforms with 8-bit bytes, the memory
2314 organization of the comment header is identical with that of Vorbis I,
2315 allowing for common parsing code despite the different bit packing
2316 conventions.
2317 \end{enumerate}
2319 \subsection{Comment Header Decode}
2321 \paragraph{Input parameters:} None.
2323 \paragraph{Output parameters:}\hfill\\*
2324 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2325 \multicolumn{1}{c}{Name} &
2326 \multicolumn{1}{c}{Type} &
2327 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2328 \multicolumn{1}{c}{Signed?} &
2329 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2330 \bitvar{VENDOR} & \multicolumn{3}{l}{String} & The vendor string. \\
2331 \bitvar{NCOMMENTS} & Integer & 32 & No & The number of user
2332 comments. \\
2333 \bitvar{COMMENTS} & \multicolumn{3}{l}{String Array} & A list of
2334 \bitvar{NCOMMENTS} user comment values. \\
2335 \bottomrule\end{tabularx}
2337 \paragraph{Variables used:}\hfill\\*
2338 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2339 \multicolumn{1}{c}{Name} &
2340 \multicolumn{1}{c}{Type} &
2341 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2342 \multicolumn{1}{c}{Signed?} &
2343 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2344 \locvar{\ci} & Integer & 32 & No & The index of the current user
2345 comment. \\
2346 \bottomrule\end{tabularx}
2347 \medskip
2349 The complete comment header is decoded as follows:
2351 \begin{enumerate}
2352 \item
2353 Decode the common header fields according to the procedure described in
2354 Section~\ref{sub:common-header}.
2355 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{81}, then stop.
2356 This packet is not the comment header.
2357 \item
2358 Decode the length of the vendor string using the procedure given in
2359 Section~\ref{sub:comment-len} into \bitvar{LEN}.
2360 \item
2361 Read \bitvar{LEN} 8-bit unsigned integers.
2362 \item
2363 Set the string \bitvar{VENDOR} to the contents of these octets.
2364 \item
2365 Decode the number of user comments using the procedure given in
2366 Section~\ref{sub:comment-len} into \bitvar{LEN}.
2367 \item
2368 Assign \bitvar{NCOMMENTS} the value stored in \bitvar{LEN}.
2369 \item
2370 For each consecutive value of \locvar{\ci} from $0$ to
2371 $(\bitvar{NCOMMENTS}-1)$, inclusive:
2372 \begin{enumerate}
2373 \item
2374 Decode the length of the current user comment using the procedure given in
2375 Section~\ref{sub:comment-len} into \bitvar{LEN}.
2376 \item
2377 Read \bitvar{LEN} 8-bit unsigned integers.
2378 \item
2379 Set the string $\bitvar{COMMENTS}[\locvar{\ci}]$ to the contents of these
2380 octets.
2381 \end{enumerate}
2382 \end{enumerate}
2384 The comment header comprises the entirety of the second header packet.
2385 Unlike the first header packet, it is not generally the only packet on the
2386 second page and may span multiple pages.
2387 The length of the comment header packet is (practically) unbounded.
2388 The comment header packet is not optional; it must be present in the stream
2389 even if it is logically empty.
2391 %TODO: \paragraph{VP3 Compatibility}
2393 \subsection{User Comment Format}
2395 The user comment vectors are structured similarly to a UNIX environment
2396 variable.
2397 That is, comment fields consist of a field name and a corresponding value and
2398 look like:
2399 \begin{center}
2400 \begin{tabular}{rcl}
2401 $\bitvar{COMMENTS}[0]$ & = & ``TITLE=the look of Theora" \\
2402 $\bitvar{COMMENTS}[1]$ & = & ``DIRECTOR=me"
2403 \end{tabular}
2404 \end{center}
2406 The field name is case-insensitive and MUST consist of ASCII characters
2407 \hex{20} through \hex{7D}, \hex{3D} (`=') excluded.
2408 ASCII \hex{41} through \hex{5A} inclusive (characters `A'--`Z') are to be
2409 considered equivalent to ASCII \hex{61} through \hex{7A} inclusive
2410 (characters `a'--`z').
2411 An entirely empty field name---one that is zero characters long---is not
2412 disallowed.
2414 The field name is immediately followed by ASCII \hex{3D} (`='); this equals
2415 sign is used to terminate the field name.
2417 The data immediately after \hex{3D} until the end of the vector is the eight-bit
2418 clean value of the field contents encoded as a UTF-8 string~\cite{rfc2044}.
2420 Field names MUST NOT be `internationalized'; this is a concession to
2421 simplicity, not an attempt to exclude the majority of the world that doesn't
2422 speak English.
2423 Applications MAY wish to present internationalized versions of the standard
2424 field names listed below to the user, but they are not to be stored in the
2425 bitstream.
2426 Field {\em contents}, however, use the UTF-8 character encoding to allow easy
2427 representation of any language.
2429 Individual `vendors' MAY use non-standard field names within reason.
2430 The proper use of comment fields as human-readable notes has already been
2431 explained.
2432 Abuse will be discouraged.
2434 There is no vendor-specific prefix to `non-standard' field names.
2435 Vendors SHOULD make some effort to avoid arbitrarily polluting the common
2436 namespace.
2437 %"and other bodies"?
2438 %If you're going to be that vague, you might as well not say anything at all.
2439 Xiph.org and other bodies will generally collect and rationalize the more
2440 useful tags to help with standardization.
2442 Field names are not restricted to occur only once within a comment header.
2443 %TODO: Example
2445 \paragraph{Field Names}
2447 Below is a proposed, minimal list of standard field names with a description of
2448 their intended use.
2449 No field names are mandatory; a comment header may contain one or more, all, or
2450 none of the names in this list.
2452 \begin{description}
2453 \item{TITLE:} Video name.
2454 %TODO: Complete list
2455 \end{description}
2457 \section{Setup Header}
2458 \label{sec:setupheader}
2460 The Theora setup header contains the limit values used to drive the loop
2461 filter, the base matrices and scale values used to build the dequantization
2462 tables, and the Huffman tables used to unpack the DCT tokens.
2463 Because the contents of this header are specific to Theora, no concessions have
2464 been made to keep the fields octet-aligned for easy parsing.
2466 \subsection{Loop Filter Limit Table Decode}
2467 \label{sub:loop-filter-limits}
2469 \paragraph{Input parameters:} None.
2471 \paragraph{Output parameters:}\hfill\\*
2472 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2473 \multicolumn{1}{c}{Name} &
2474 \multicolumn{1}{c}{Type} &
2475 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2476 \multicolumn{1}{c}{Signed?} &
2477 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2478 \bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} &
2479 7 & No & A 64-element array of loop filter limit
2480 values. \\
2481 \bottomrule\end{tabularx}
2483 \paragraph{Variables used:}\hfill\\*
2484 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2485 \multicolumn{1}{c}{Name} &
2486 \multicolumn{1}{c}{Type} &
2487 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2488 \multicolumn{1}{c}{Signed?} &
2489 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2490 \locvar{\qi} & Integer & 6 & No & The quantization index. \\
2491 \locvar{NBITS} & Integer & 3 & No & The size of values being read in the
2492 current table. \\
2493 \bottomrule\end{tabularx}
2494 \medskip
2496 This procedure decodes the table of loop filter limit values used to drive the
2497 loop filter, which is described in Section~\ref{sub:loop-filter-limits}.
2498 It is decoded as follows:
2500 \begin{enumerate}
2501 \item
2502 Read a 3-bit unsigned integer as \locvar{NBITS}.
2503 \item
2504 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2505 \begin{enumerate}
2506 \item
2507 Read an \locvar{NBITS}-bit unsigned integer as $\bitvar{LFLIMS}[\locvar{\qi}]$.
2508 \end{enumerate}
2509 \end{enumerate}
2511 \paragraph{VP3 Compatibility}
2513 The loop filter limit values are hardcoded in VP3.
2514 The values used are given in Appendix~\ref{app:vp3-loop-filter-limits}.
2516 \subsection{Quantization Parameters Decode}
2517 \label{sub:quant-params}
2519 \paragraph{Input parameters:} None.
2521 \paragraph{Output parameters:}\hfill\\*
2522 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2523 \multicolumn{1}{c}{Name} &
2524 \multicolumn{1}{c}{Type} &
2525 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2526 \multicolumn{1}{c}{Signed?} &
2527 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2528 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2529 16 & No & A 64-element array of scale values for
2530 AC coefficients for each \qi\ value. \\
2531 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2532 16 & No & A 64-element array of scale values for
2533 the DC coefficient for each \qi\ value. \\
2534 \bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\
2535 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
2536 8 & No & A $\bitvar{NBMS}\times 64$ array
2537 containing the base matrices. \\
2538 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
2539 6 & No & A $2\times 3$ array containing the
2540 number of quant ranges for a given \qti\ and \pli, respectively.
2541 This is at most $63$. \\
2542 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2543 6 & No & A $2\times 3\times 63$ array of the
2544 sizes of each quant range for a given \qti\ and \pli, respectively.
2545 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
2546 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2547 9 & No & A $2\times 3\times 64$ array of the
2548 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
2549 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
2550 \bottomrule\end{tabularx}
2552 \paragraph{Variables used:}\hfill\\*
2553 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2554 \multicolumn{1}{c}{Name} &
2555 \multicolumn{1}{c}{Type} &
2556 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2557 \multicolumn{1}{c}{Signed?} &
2558 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2559 \locvar{\qti} & Integer & 1 & No & A quantization type index.
2560 See Table~\ref{tab:quant-types}.\\
2561 \locvar{\qtj} & Integer & 1 & No & A quantization type index. \\
2562 \locvar{\pli} & Integer & 2 & No & A color plane index.
2563 See Table~\ref{tab:color-planes}.\\
2564 \locvar{\plj} & Integer & 2 & No & A color plane index. \\
2565 \locvar{\qi} & Integer & 6 & No & The quantization index. \\
2566 \locvar{\ci} & Integer & 6 & No & The DCT coefficient index. \\
2567 \locvar{\bmi} & Integer & 9 & No & The base matrix index. \\
2568 \locvar{\qri} & Integer & 6 & No & The quant range index. \\
2569 \locvar{NBITS} & Integer & 5 & No & The size of fields to read. \\
2570 \locvar{NEWQR} & Integer & 1 & No & Flag that indicates a new set of quant
2571 ranges will be defined. \\
2572 \locvar{RPQR} & Integer & 1 & No & Flag that indicates the quant ranges to
2573 copy will come from the same color plane. \\
2574 \bottomrule\end{tabularx}
2575 \medskip
2577 The AC scale and DC scale values are defined in two simple tables with 64
2578 values each, one for each \qi\ value.
2579 The same scale values are used for every quantization type and color plane.
2581 The base matrices for all quantization types and color planes are stored in a
2582 single table.
2583 These are then referenced by index in several sets of \term{quant ranges}.
2584 The purpose of the quant ranges is to specify which base matrices are used for
2585 which \qi\ values.
2587 A set of quant ranges is defined for each quantization type and color plane.
2588 To save space in the header, bit flags allow a set of quant ranges to be copied
2589 from a previously defined set instead of being specified explicitly.
2590 Every set except the first one can be copied from the immediately preceding
2591 set.
2592 Similarly, if the quantization type is not $0$, the set can be copied from the
2593 set defined for the same color plane for the preceding quantization type.
2594 This formulation allows compact representation of, for example, the same
2595 set of quant ranges in both chroma channels, as is done in the original VP3,
2596 or the same set of quant ranges in INTRA and INTER modes.
2598 Each quant range is defined by a size and two base matrix indices, one for each
2599 end of the range.
2600 The base matrix for the end of one range is used as the start of the next
2601 range, so that for $n$ ranges, $n+1$ base matrices are specified.
2602 The base matrices for the \qi\ values between the two endpoints of the range
2603 are generated by linear interpolation.
2605 %TODO: figure
2607 The location of the endpoints of each range is encoded by their size.
2608 The \qi\ value for the left end-point is the sum of the sizes of all preceding
2609 ranges, and the \qi\ value for the right end-point adds the size of the
2610 current range.
2611 Thus the sum of the sizes of all the ranges MUST be 63, so that the last range
2612 falls on the last possible \qi\ value.
2614 The complete set of quantization parameters are decoded as follows:
2616 \begin{enumerate}
2617 \item
2618 Read a 4-bit unsigned integer.
2619 Assign \locvar{NBITS} the value read, plus one.
2620 \item
2621 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2622 \begin{enumerate}
2623 \item
2624 Read an \locvar{NBITS}-bit unsigned integer as
2625 $\bitvar{ACSCALE}[\locvar{\qi}]$.
2626 \end{enumerate}
2627 \item
2628 Read a 4-bit unsigned integer.
2629 Assign \locvar{NBITS} the value read, plus one.
2630 \item
2631 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2632 \begin{enumerate}
2633 \item
2634 Read an \locvar{NBITS}-bit unsigned integer as
2635 $\bitvar{DCSCALE}[\locvar{\qi}]$.
2636 \end{enumerate}
2637 \item
2638 Read a 9-bit unsigned integer.
2639 Assign \bitvar{NBMS} the value decoded, plus one.
2640 \bitvar{NBMS} MUST be no greater than 384.
2641 \item
2642 For each consecutive value of \locvar{\bmi} from $0$ to $(\bitvar{NBMS}-1)$,
2643 inclusive:
2644 \begin{enumerate}
2645 \item
2646 For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive:
2647 \begin{enumerate}
2648 \item
2649 Read an 8-bit unsigned integer as $\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]$.
2650 \end{enumerate}
2651 \end{enumerate}
2652 \item
2653 For each consecutive value of \locvar{\qti} from $0$ to $1$, inclusive:
2654 \begin{enumerate}
2655 \item
2656 For each consecutive value of \locvar{\pli} from $0$ to $2$, inclusive:
2657 \begin{enumerate}
2658 \item
2659 If $\locvar{\qti}>0$ or $\locvar{\pli}>0$, read a 1-bit unsigned integer as
2660 \locvar{NEWQR}.
2661 \item
2662 Else, assign \locvar{NEWQR} the value one.
2663 \item
2664 If \locvar{NEWQR} is zero, then we are copying a previously defined set of
2665 quant ranges.
2666 In that case:
2667 \begin{enumerate}
2668 \item
2669 If $\locvar{\qti}>0$, read a 1-bit unsigned integer as \locvar{RPQR}.
2670 \item
2671 Else, assign \locvar{RPQR} the value zero.
2672 \item
2673 If \locvar{RPQR} is one, assign \locvar{\qtj} the value $(\locvar{\qti}-1)$
2674 and assign \locvar{\plj} the value \locvar{\pli}.
2675 This selects the set of quant ranges defined for the same color plane as this
2676 one, but for the previous quantization type.
2677 \item
2678 Else assign \locvar{\qtj} the value $(3*\locvar{\qti}+\locvar{\pli}-1)//3$ and
2679 assign \locvar{\plj} the value $(\locvar{\pli}+2)\%3$.
2680 This selects the most recent set of quant ranges defined.
2681 \item
2682 Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value
2683 $\bitvar{NQRS}[\locvar{\qtj}][\locvar{\plj}]$.
2684 \item
2685 Assign $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}]$ the values in
2686 $\bitvar{QRSIZES}[\locvar{\qtj}][\locvar{\plj}]$.
2687 \item
2688 Assign $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}]$ the values in
2689 $\bitvar{QRBMIS}[\locvar{\qtj}][\locvar{\plj}]$.
2690 \end{enumerate}
2691 \item
2692 Else, \locvar{NEWQR} is one, which indicates that we are defining a new set of
2693 quant ranges.
2694 In that case:
2695 \begin{enumerate}
2696 \item
2697 Assign $\locvar{\qri}$ the value zero.
2698 \item
2699 Assign $\locvar{\qi}$ the value zero.
2700 \item
2701 Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\
2702 $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2703 If this is greater than or equal to \bitvar{NBMS}, stop.
2704 The stream is undecodable.
2705 \item
2706 \label{step:qr-loop}
2707 Read an $\ilog(63-\locvar{\qi})$-bit unsigned integer.
2708 Assign\\ $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$ the value
2709 read, plus one.
2710 \item
2711 Assign \locvar{\qi} the value $\locvar{\qi}+
2712 \bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2713 \item
2714 Assign \locvar{\qri} the value $\locvar{\qri}+1$.
2715 \item
2716 Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\
2717 $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2718 \item
2719 If \locvar{\qi} is less than 63, go back to step~\ref{step:qr-loop}.
2720 \item
2721 If \locvar{\qi} is greater than 63, stop.
2722 The stream is undecodable.
2723 \item
2724 Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value \locvar{\qri}.
2725 \end{enumerate}
2726 \end{enumerate}
2727 \end{enumerate}
2728 \end{enumerate}
2730 \paragraph{VP3 Compatibility}
2732 The quantization parameters are hardcoded in VP3.
2733 The values used are given in Appendix~\ref{app:vp3-quant-params}.
2735 \subsection{Computing a Quantization Matrix}
2736 \label{sub:quant-mat}
2738 \paragraph{Input parameters:}\hfill\\*
2739 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2740 \multicolumn{1}{c}{Name} &
2741 \multicolumn{1}{c}{Type} &
2742 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2743 \multicolumn{1}{c}{Signed?} &
2744 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2745 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2746 16 & No & A 64-element array of scale values for
2747 AC coefficients for each \qi\ value. \\
2748 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2749 16 & No & A 64-element array of scale values for
2750 the DC coefficient for each \qi\ value. \\
2751 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
2752 8 & No & A $\bitvar{NBMS}\times 64$ array
2753 containing the base matrices. \\
2754 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
2755 6 & No & A $2\times 3$ array containing the
2756 number of quant ranges for a given \qti\ and \pli, respectively.
2757 This is at most $63$. \\
2758 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2759 6 & No & A $2\times 3\times 63$ array of the
2760 sizes of each quant range for a given \qti\ and \pli, respectively.
2761 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
2762 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2763 9 & No & A $2\times 3\times 64$ array of the
2764 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
2765 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
2766 \bitvar{\qti} & Integer & 1 & No & A quantization type index.
2767 See Table~\ref{tab:quant-types}.\\
2768 \bitvar{\pli} & Integer & 2 & No & A color plane index.
2769 See Table~\ref{tab:color-planes}.\\
2770 \bitvar{\qi} & Integer & 6 & No & The quantization index. \\
2771 \bottomrule\end{tabularx}
2773 \paragraph{Output parameters:}\hfill\\*
2774 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2775 \multicolumn{1}{c}{Name} &
2776 \multicolumn{1}{c}{Type} &
2777 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2778 \multicolumn{1}{c}{Signed?} &
2779 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2780 \bitvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} &
2781 16 & No & A 64-element array of quantization
2782 values for each DCT coefficient in natural order. \\
2783 \bottomrule\end{tabularx}
2785 \paragraph{Variables used:}\hfill\\*
2786 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2787 \multicolumn{1}{c}{Name} &
2788 \multicolumn{1}{c}{Type} &
2789 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2790 \multicolumn{1}{c}{Signed?} &
2791 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2792 \locvar{\ci} & Integer & 6 & No & The DCT coefficient index. \\
2793 \locvar{\bmi} & Integer & 9 & No & The base matrix index. \\
2794 \locvar{\bmj} & Integer & 9 & No & The base matrix index. \\
2795 \locvar{\qri} & Integer & 6 & No & The quant range index. \\
2796 \locvar{QISTART} & Integer & 6 & No & The left end-point of the \qi\ range. \\
2797 \locvar{QIEND } & Integer & 6 & No & The right end-point of the \qi\ range. \\
2798 \locvar{BM} & \multicolumn{1}{p{40pt}}{Integer array} &
2799 8 & No & A 64-element array containing the
2800 interpolated base matrix. \\
2801 \locvar{QMIN} & Integer & 16 & No & The minimum quantization value allowed
2802 for the current coefficient. \\
2803 \locvar{QSCALE} & Integer & 16 & No & The current scale value. \\
2804 \bottomrule\end{tabularx}
2805 \medskip
2807 The following procedure can be used to generate a single quantization matrix
2808 for a given quantization type, color plane, and \qi\ value, given the
2809 quantization parameters decoded in Section~\ref{sub:quant-params}.
2811 Note that the product of the scale value and the base matrix value is in units
2812 of $100$ths of a pixel value, and thus is divided by $100$ to return it to
2813 units of a single pixel value.
2814 This value is then scaled by four, to match the scaling of the DCT output,
2815 which is also a factor of four larger than the orthonormal version of the
2816 transform.
2818 \begin{enumerate}
2819 \item
2820 Assign \locvar{\qri} the index of a quant range such that
2821 \begin{displaymath}
2822 \sum_{\qrj=0}^{\locvar{\qri}-1}
2823 \bitvar{\qi} \ge \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj],
2824 \end{displaymath}
2826 \begin{displaymath}
2827 \sum_{\qrj=0}^{\locvar{\qri}}
2828 \bitvar{\qi} \le \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj],
2829 \end{displaymath}
2830 where summation from $0$ to $-1$ is defined to be zero.
2831 If there is more than one such value of $\locvar{\qri}$, i.e., if \bitvar{\qi}
2832 lies on the boundary between two quant ranges, then the output will be the
2833 same regardless of which one is chosen.
2834 \item
2835 Assign \locvar{QISTART} the value
2836 \begin{displaymath}
2837 \sum_{\qrj=0}^{\qri-1} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj].
2838 \end{displaymath}
2839 \item
2840 Assign \locvar{QIEND} the value
2841 \begin{displaymath}
2842 \sum_{\qrj=0}^{\qri} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj].
2843 \end{displaymath}
2844 \item
2845 Assign \locvar{\bmi} the value
2846 $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri]$.
2847 \item
2848 Assign \locvar{\bmj} the value
2849 $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri+1]$.
2850 \item
2851 For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive:
2852 \begin{enumerate}
2853 \item
2854 Assign $\locvar{BM}[\locvar{\ci}]$ the value
2855 \begin{displaymath}
2856 \begin{split}
2857 (&2*(\locvar{QIEND}-\bitvar{\qi})*\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]\\
2858 &+2*(\bitvar{\qi}-
2859 \locvar{QISTART})*\bitvar{BMS}[\locvar{\bmj}][\locvar{\ci}]\\
2860 &+\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}])//
2861 (2*\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}])
2862 \end{split}
2863 \end{displaymath}
2864 \item
2865 Assign \locvar{QMIN} the value given by Table~\ref{tab:qmin} according to
2866 \bitvar{\qti} and \locvar{\ci}.
2868 \begin{table}[htbp]
2869 \begin{center}
2870 \begin{tabular}{clr}\toprule
2871 Coefficient & \multicolumn{1}{c}{\bitvar{\qti}}
2872 & \locvar{QMIN} \\\midrule
2873 $\locvar{\ci}=0$ & $0$ (Intra) & $16$ \\
2874 $\locvar{\ci}>0$ & $0$ (Intra) & $8$ \\
2875 $\locvar{\ci}=0$ & $1$ (Inter) & $32$ \\
2876 $\locvar{\ci}>0$ & $1$ (Inter) & $16$ \\
2877 \bottomrule\end{tabular}
2878 \end{center}
2879 \caption{Minimum Quantization Values}
2880 \label{tab:qmin}
2881 \end{table}
2883 \item
2884 If \locvar{\ci} equals zero, assign $\locvar{QSCALE}$ the value
2885 $\bitvar{DCSCALE}[\bitvar{\qi}]$.
2886 \item
2887 Else, assign $\locvar{QSCALE}$ the value
2888 $\bitvar{ACSCALE}[\bitvar{\qi}]$.
2889 \item
2890 Assign $\bitvar{QMAT}[\locvar{\ci}]$ the value
2891 \begin{displaymath}
2892 \max(\locvar{QMIN},
2893 \min((\locvar{QSCALE}*\locvar{BM}[\locvar{\ci}]//100)*4,4096)).
2894 \end{displaymath}
2895 \end{enumerate}
2896 \end{enumerate}
2898 \subsection{DCT Token Huffman Tables}
2899 \label{sub:huffman-tables}
2901 \paragraph{Input parameters:} None.
2903 \paragraph{Output parameters:}\hfill\\*
2904 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2905 \multicolumn{1}{c}{Name} &
2906 \multicolumn{1}{c}{Type} &
2907 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2908 \multicolumn{1}{c}{Signed?} &
2909 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2910 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
2911 & An 80-element array of Huffman tables
2912 with up to 32 entries each. \\
2913 \bottomrule\end{tabularx}
2915 \paragraph{Variables used:}\hfill\\*
2916 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2917 \multicolumn{1}{c}{Name} &
2918 \multicolumn{1}{c}{Type} &
2919 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2920 \multicolumn{1}{c}{Signed?} &
2921 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2922 \locvar{HBITS} & Bit string & 32 & No & A string of up to 32 bits. \\
2923 \locvar{TOKEN} & Integer & 5 & No & A single DCT token value. \\
2924 \locvar{ISLEAF} & Integer & 1 & No & Flag that indicates if the current
2925 node of the tree being decoded is a leaf node. \\
2926 \bottomrule\end{tabularx}
2927 \medskip
2929 The Huffman tables used to decode DCT tokens are stored in the setup header in
2930 the form of a binary tree.
2931 This enforces the requirements that the code be full---so that any sequence of
2932 bits will produce a valid sequence of tokens---and that the code be
2933 prefix-free so that there is no ambiguity when decoding.
2935 One more restriction is placed on the tables that is not explicitly enforced by
2936 the bitstream syntax, but nevertheless must be obeyed by compliant encoders.
2937 There must be no more than 32 entries in a single table.
2938 Note that this restriction along with the fullness requirement limit the
2939 maximum size of a single Huffman code to 32 bits.
2940 It is probably a good idea to enforce this latter consequence explicitly when
2941 implementing the decoding procedure as a recursive algorithm, so as to prevent
2942 a possible stack overflow given an invalid bitstream.
2944 Although there are 32 different DCT tokens, and thus a normal table will have
2945 exactly 32 entries, this is not explicitly required.
2946 It is allowable to use a Huffman code that omits some---but not all---of the
2947 possible token values.
2948 It is also allowable, if not particularly useful, to specify multiple codes for
2949 the same token value in a single table.
2950 Note also that token values may appear in the tree in any order.
2951 In particular, it is not safe to assume that token value zero (which ends a
2952 single block), has a Huffman code of all zeros.
2954 The tree is decoded as follows:
2956 \begin{enumerate}
2957 \item
2958 For each consecutive value of \locvar{\hti} from $0$ to $80$, inclusive:
2959 \begin{enumerate}
2960 \item
2961 Set \locvar{HBITS} to the empty string.
2962 \item
2963 \label{step:huff-tree-loop}
2964 If \locvar{HBITS} is longer than 32 bits in length, stop.
2965 The stream is undecodable.
2966 \item
2967 Read a 1-bit unsigned integer as \locvar{ISLEAF}.
2968 \item
2969 If \locvar{ISLEAF} is one:
2970 \begin{enumerate}
2971 \item
2972 If the number of entries in table $\bitvar{HTS}[\locvar{\hti}]$ is already 32,
2973 stop.
2974 The stream is undecodable.
2975 \item
2976 Read a 5-bit unsigned integer as \locvar{TOKEN}.
2977 \item
2978 Add the pair $(\locvar{HBITS},\locvar{TOKEN})$ to Huffman table
2979 $\bitvar{HTS}[\locvar{\hti}]$.
2980 \end{enumerate}
2981 \item
2982 Otherwise:
2983 \begin{enumerate}
2984 \item
2985 Add a `0' to the end of \locvar{HBITS}.
2986 \item
2987 Decode the `0' sub-tree using this procedure, starting from
2988 step~\ref{step:huff-tree-loop}.
2989 \item
2990 Remove the `0' from the end of \locvar{HBITS} and add a `1' to the end of
2991 \locvar{HBITS}.
2992 \item
2993 Decode the `1' sub-tree using this procedure, starting from
2994 step~\ref{step:huff-tree-loop}.
2995 \item
2996 Remove the `1' from the end of \locvar{HBITS}.
2997 \end{enumerate}
2998 \end{enumerate}
2999 \end{enumerate}
3001 \paragraph{VP3 Compatibility}
3003 The DCT token Huffman tables are hardcoded in VP3.
3004 The values used are given in Appendix~\ref{app:vp3-huffman-tables}.
3006 \subsection{Setup Header Decode}
3008 \paragraph{Input parameters:} None.
3010 \paragraph{Output parameters:}\hfill\\*
3011 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3012 \multicolumn{1}{c}{Name} &
3013 \multicolumn{1}{c}{Type} &
3014 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3015 \multicolumn{1}{c}{Signed?} &
3016 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3017 \bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} &
3018 7 & No & A 64-element array of loop filter limit
3019 values. \\
3020 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
3021 16 & No & A 64-element array of scale values for
3022 AC coefficients for each \qi\ value. \\
3023 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
3024 16 & No & A 64-element array of scale values for
3025 the DC coefficient for each \qi\ value. \\
3026 \bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\
3027 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
3028 8 & No & A $\bitvar{NBMS}\times 64$ array
3029 containing the base matrices. \\
3030 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
3031 6 & No & A $2\times 3$ array containing the
3032 number of quant ranges for a given \qti\ and \pli, respectively.
3033 This is at most $63$. \\
3034 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
3035 6 & No & A $2\times 3\times 63$ array of the
3036 sizes of each quant range for a given \qti\ and \pli, respectively.
3037 Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\
3038 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
3039 9 & No & A $2\times 3\times 64$ array of the
3040 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
3041 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\
3042 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
3043 & An 80-element array of Huffman tables
3044 with up to 32 entries each. \\
3045 \bottomrule\end{tabularx}
3047 \paragraph{Variables used:} None.
3048 \medskip
3050 The complete setup header is decoded as follows:
3052 \begin{enumerate}
3053 \item
3054 Decode the common header fields according to the procedure described in
3055 Section~\ref{sub:common-header}.
3056 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{82}, then stop.
3057 This packet is not the setup header.
3058 \item
3059 Decode the loop filter limit value table using the procedure given in
3060 Section~\ref{sub:loop-filter-limits} into \bitvar{LFLIMS}.
3061 \item
3062 Decode the quantization parameters using the procedure given in
3063 Section~\ref{sub:quant-params}.
3064 The results are stored in \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{NBMS},
3065 \bitvar{BMS}, \bitvar{NQRS}, \bitvar{QRSIZES}, and \bitvar{QRBMIS}.
3066 \item
3067 Decode the DCT token Huffman tables using the procedure given in
3068 Section~\ref{sub:huffman-tables} into \bitvar{HTS}.
3069 \end{enumerate}
3071 \chapter{Frame Decode}
3073 This section describes the complete procedure necessary to decode a single
3074 frame.
3075 This begins with the frame header, followed by coded block flags, macro block
3076 modes, motion vectors, block-level \qi\ values, and finally the DCT residual
3077 tokens, which are used to reconstruct the frame.
3079 \section{Frame Header Decode}
3080 \label{sub:frame-header}
3082 \paragraph{Input parameters:} None.
3084 \paragraph{Output parameters:}\hfill\\*
3085 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3086 \multicolumn{1}{c}{Name} &
3087 \multicolumn{1}{c}{Type} &
3088 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3089 \multicolumn{1}{c}{Signed?} &
3090 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3091 \bitvar{FTYPE} & Integer & 1 & No & The frame type. \\
3092 \bitvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\
3093 \bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} &
3094 6 & No & An \bitvar{NQIS}-element array of
3095 \qi\ values. \\
3096 \bottomrule\end{tabularx}
3098 \paragraph{Variables used:}\hfill\\*
3099 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3100 \multicolumn{1}{c}{Name} &
3101 \multicolumn{1}{c}{Type} &
3102 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3103 \multicolumn{1}{c}{Signed?} &
3104 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3105 \locvar{MOREQIS} & Integer & 1 & No & A flag indicating there are more
3106 \qi\ values to be decoded. \\
3107 \bottomrule\end{tabularx}
3108 \medskip
3110 The frame header selects which type of frame is being decoded, intra or inter,
3111 and contains the list of \qi\ values that will be used in this frame.
3112 The first \qi\ value will be used for {\em all} DC coefficients in all blocks.
3113 This is done to ensure that DC prediction, which is done in the quantized
3114 domain, works as expected.
3115 The AC coefficients, however, can be dequantized using any \qi\ value on the
3116 list, selected on a block-by-block basis.
3118 \begin{enumerate}
3119 \item
3120 Read a 1-bit unsigned integer.
3121 If the value read is not zero, stop.
3122 This is not a data packet.
3123 \item
3124 Read a 1-bit unsigned integer as \bitvar{FTYPE}.
3125 This is the type of frame being decoded, as given in
3126 Table~\ref{tab:frame-type}.
3127 If this is the first frame being decoded, this MUST be zero.
3129 \begin{table}[htbp]
3130 \begin{center}
3131 \begin{tabular}{cl}\toprule
3132 \bitvar{FTYPE} & Frame Type \\\midrule
3133 $0$ & Intra frame \\
3134 $1$ & Inter frame \\
3135 \bottomrule\end{tabular}
3136 \end{center}
3137 \caption{Frame Type Values}
3138 \label{tab:frame-type}
3139 \end{table}
3141 \item
3142 Read in a 6-bit unsigned integer as $\bitvar{QIS}[0]$.
3143 \item
3144 Read a 1-bit unsigned integer as \locvar{MOREQIS}.
3145 \item
3146 If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 1.
3147 \item
3148 Otherwise:
3149 \begin{enumerate}
3150 \item
3151 Read in a 6-bit unsigned integer as $\bitvar{QIS}[1]$.
3152 \item
3153 Read a 1-bit unsigned integer as \locvar{MOREQIS}.
3154 \item
3155 If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 2.
3156 \item
3157 Otherwise:
3158 \begin{enumerate}
3159 \item
3160 Read in a 6-bit unsigned integer as $\bitvar{QIS}[2]$.
3161 \item
3162 Set \bitvar{NQIS} to 3.
3163 \end{enumerate}
3164 \end{enumerate}
3165 \item
3166 If \bitvar{FTYPE} is 0, read a 3-bit unsigned integer.
3167 These bits are reserved.
3168 If this value is not zero, stop.
3169 This frame is not decodable according to this specification.
3170 \end{enumerate}
3172 \paragraph{VP3 Compatibility}
3174 The precise format of the frame header is substantially different in Theora
3175 than in VP3.
3176 The original VP3 format includes a larger number of unused, reserved bits that
3177 are required to be zero.
3178 The original VP3 frame header also can contain only a single \qi\ value,
3179 because VP3 does not support block-level \qi\ values and uses the same
3180 \qi\ value for all the coefficients in a frame.
3182 \section{Run-Length Encoded Bit Strings}
3184 Two variations of run-length encoding are used to store sequences of bits for
3185 the block coded flags and the block-level \qi\ values.
3186 The procedures to decode these bit sequences are specified in the following two
3187 sections.
3189 \subsection{Long-Run Bit String Decode}
3190 \label{sub:long-run}
3192 \paragraph{Input parameters:}\hfill\\*
3193 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3194 \multicolumn{1}{c}{Name} &
3195 \multicolumn{1}{c}{Type} &
3196 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3197 \multicolumn{1}{c}{Signed?} &
3198 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3199 \bitvar{NBITS} & Integer & 36 & No & The number of bits to decode. \\
3200 \bottomrule\end{tabularx}
3202 \paragraph{Output parameters:}\hfill\\*
3203 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3204 \multicolumn{1}{c}{Name} &
3205 \multicolumn{1}{c}{Type} &
3206 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3207 \multicolumn{1}{c}{Signed?} &
3208 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3209 \bitvar{BITS} & Bit string & & & The decoded bits. \\
3210 \bottomrule\end{tabularx}
3212 \paragraph{Variables used:}\hfill\\*
3213 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3214 \multicolumn{1}{c}{Name} &
3215 \multicolumn{1}{c}{Type} &
3216 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3217 \multicolumn{1}{c}{Signed?} &
3218 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3219 \locvar{LEN} & Integer & 36 & No & The number of bits decoded so far. \\
3220 \locvar{BIT} & Integer & 1 & No & The value associated with the current
3221 run. \\
3222 \locvar{RLEN} & Integer & 13 & No & The length of the current run. \\
3223 \locvar{RBITS} & Integer & 4 & No & The number of extra bits needed to
3224 decode the run length. \\
3225 \locvar{RSTART} & Integer & 6 & No & The start of the possible run-length
3226 values for a given Huffman code. \\
3227 \locvar{ROFFS} & Integer & 12 & No & The offset from \locvar{RSTART} of the
3228 run-length. \\
3229 \bottomrule\end{tabularx}
3230 \medskip
3232 There is no practical limit to the number of consecutive 0's and 1's that can
3233 be decoded with this procedure.
3234 In reality, the run length is limited by the number of blocks in a single
3235 frame, because more will never be requested.
3236 A separate procedure described in Section~\ref{sub:short-run} is used when
3237 there is a known limit on the maximum size of the runs.
3239 For the first run, a single bit value is read, and then a Huffman-coded
3240 representation of a run length is decoded, and that many copies of the bit
3241 value are appended to the bit string.
3242 For each consecutive run, the value of the bit is toggled instead of being read
3243 from the bitstream.
3245 The only exception is if the length of the previous run was 4129, the maximum
3246 possible length encodable by the Huffman-coded representation.
3247 In this case another bit value is read from the stream, to allow for
3248 consecutive runs of 0's or 1's longer than this maximum.
3250 Note that in both cases---for the first run and after a run of length 4129---if
3251 no more bits are needed, then no bit value is read.
3253 The complete decoding procedure is as follows:
3255 \begin{enumerate}
3256 \item
3257 Assign \locvar{LEN} the value 0.
3258 \item
3259 Assign \bitvar{BITS} the empty string.
3260 \item
3261 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3262 \bitvar{BITS}.
3263 \item
3264 Read a 1-bit unsigned integer as \locvar{BIT}.
3265 \item
3266 \label{step:long-run-loop}
3267 Read a bit at a time until one of the Huffman codes given in
3268 Table~\ref{tab:long-run} is recognized.
3270 \begin{table}[htbp]
3271 \begin{center}
3272 \begin{tabular}{lrrl}\toprule
3273 Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths \\\midrule
3274 \bin{0} & $1$ & $0$ & $1$ \\
3275 \bin{10} & $2$ & $1$ & $2\ldots 3$ \\
3276 \bin{110} & $4$ & $1$ & $4\ldots 5$ \\
3277 \bin{1110} & $6$ & $2$ & $6\ldots 9$ \\
3278 \bin{11110} & $10$ & $3$ & $10\ldots 17$ \\
3279 \bin{111110} & $18$ & $4$ & $18\ldots 33$ \\
3280 \bin{111111} & $34$ & $12$ & $34\ldots 4129$ \\
3281 \bottomrule\end{tabular}
3282 \end{center}
3283 \caption{Huffman Codes for Long Run Lengths}
3284 \label{tab:long-run}
3285 \end{table}
3287 \item
3288 Assign \locvar{RSTART} and \locvar{RBITS} the values given in
3289 Table~\ref{tab:long-run} according to the Huffman code read.
3290 \item
3291 Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}.
3292 \item
3293 Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$.
3294 \item
3295 Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}.
3296 \item
3297 Add \locvar{RLEN} to the value \locvar{LEN}.
3298 \locvar{LEN} MUST be less than or equal to \bitvar{NBITS}.
3299 \item
3300 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3301 \bitvar{BITS}.
3302 \item
3303 If \locvar{RLEN} equals 4129, read a 1-bit unsigned integer as \locvar{BIT}.
3304 \item
3305 Otherwise, assign \locvar{BIT} the value $(1-\locvar{BIT})$.
3306 \item
3307 Continue decoding runs from step~\ref{step:long-run-loop}.
3308 \end{enumerate}
3310 \paragraph{VP3 Compatibility}
3312 VP3 does not read a new bit value after decoding a run length of 4129.
3313 This limits the maximum number of consecutive 0's or 1's to 4129 in
3314 VP3-compatible streams.
3315 For reasonable video sizes of $1920\times 1080$ or less in 4:2:0 format---the
3316 only pixel format VP3 supports---this does not pose any problems because runs
3317 longer than 4129 are not needed.
3319 \subsection{Short-Run Bit String Decode}
3320 \label{sub:short-run}
3322 \paragraph{Input parameters:}\hfill\\*
3323 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3324 \multicolumn{1}{c}{Name} &
3325 \multicolumn{1}{c}{Type} &
3326 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3327 \multicolumn{1}{c}{Signed?} &
3328 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3329 \bitvar{NBITS} & Integer & 36 & No & The number of bits to decode. \\
3330 \bottomrule\end{tabularx}
3332 \paragraph{Output parameters:}\hfill\\*
3333 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3334 \multicolumn{1}{c}{Name} &
3335 \multicolumn{1}{c}{Type} &
3336 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3337 \multicolumn{1}{c}{Signed?} &
3338 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3339 \bitvar{BITS} & Bit string & & & The decoded bits. \\
3340 \bottomrule\end{tabularx}
3342 \paragraph{Variables used:}\hfill\\*
3343 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3344 \multicolumn{1}{c}{Name} &
3345 \multicolumn{1}{c}{Type} &
3346 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3347 \multicolumn{1}{c}{Signed?} &
3348 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3349 \locvar{LEN} & Integer & 36 & No & The number of bits decoded so far. \\
3350 \locvar{BIT} & Integer & 1 & No & The value associated with the current
3351 run. \\
3352 \locvar{RLEN} & Integer & 13 & No & The length of the current run. \\
3353 \locvar{RBITS} & Integer & 4 & No & The number of extra bits needed to
3354 decode the run length. \\
3355 \locvar{RSTART} & Integer & 6 & No & The start of the possible run-length
3356 values for a given Huffman code. \\
3357 \locvar{ROFFS} & Integer & 12 & No & The offset from \locvar{RSTART} of the
3358 run-length. \\
3359 \bottomrule\end{tabularx}
3360 \medskip
3362 This procedure is similar to the procedure outlined in
3363 Section~\ref{sub:long-run}, except that the maximum number of consecutive 0's
3364 or 1's is limited to 30.
3365 This is the maximum run length needed when encoding a bit for each of the 16
3366 blocks in a super block when it is known that not all the bits in a super
3367 block are the same.
3369 The complete decoding procedure is as follows:
3371 \begin{enumerate}
3372 \item
3373 Assign \locvar{LEN} the value 0.
3374 \item
3375 Assign \bitvar{BITS} the empty string.
3376 \item
3377 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3378 \bitvar{BITS}.
3379 \item
3380 Read a 1-bit unsigned integer as \locvar{BIT}.
3381 \item
3382 \label{step:short-run-loop}
3383 Read a bit at a time until one of the Huffman codes given in
3384 Table~\ref{tab:short-run} is recognized.
3386 \begin{table}[htbp]
3387 \begin{center}
3388 \begin{tabular}{lrrl}\toprule
3389 Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths \\\midrule
3390 \bin{0} & $1$ & $1$ & $1\ldots 2$ \\
3391 \bin{10} & $3$ & $1$ & $3\ldots 4$ \\
3392 \bin{110} & $5$ & $1$ & $5\ldots 6$ \\
3393 \bin{1110} & $7$ & $2$ & $7\ldots 10$ \\
3394 \bin{11110} & $11$ & $2$ & $11\ldots 14$ \\
3395 \bin{11111} & $15$ & $4$ & $15\ldots 30$ \\
3396 \bottomrule\end{tabular}
3397 \end{center}
3398 \caption{Huffman Codes for Short Run Lengths}
3399 \label{tab:short-run}
3400 \end{table}
3402 \item
3403 Assign \locvar{RSTART} and \locvar{RBITS} the values given in
3404 Table~\ref{tab:short-run} according to the Huffman code read.
3405 \item
3406 Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}.
3407 \item
3408 Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$.
3409 \item
3410 Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}.
3411 \item
3412 Add \locvar{RLEN} to the value \locvar{LEN}.
3413 \locvar{LEN} MUST be less than or equal to \bitvar{NBITS}.
3414 \item
3415 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3416 \bitvar{BITS}.
3417 \item
3418 Assign \locvar{BIT} the value $(1-\locvar{BIT})$.
3419 \item
3420 Continue decoding runs from step~\ref{step:short-run-loop}.
3421 \end{enumerate}
3423 \section{Coded Block Flags Decode}
3424 \label{sub:coded-blocks}
3426 \paragraph{Input parameters:}\hfill\\*
3427 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3428 \multicolumn{1}{c}{Name} &
3429 \multicolumn{1}{c}{Type} &
3430 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3431 \multicolumn{1}{c}{Signed?} &
3432 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3433 \bitvar{FTYPE} & Integer & 1 & No & The frame type. \\
3434 \bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a
3435 frame. \\
3436 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
3437 frame. \\
3438 \bottomrule\end{tabularx}
3440 \paragraph{Output parameters:}\hfill\\*
3441 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3442 \multicolumn{1}{c}{Name} &
3443 \multicolumn{1}{c}{Type} &
3444 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3445 \multicolumn{1}{c}{Signed?} &
3446 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3447 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3448 1 & No & An \bitvar{NBS}-element array of flags
3449 indicating which blocks are coded. \\
3450 \bottomrule\end{tabularx}
3452 \paragraph{Variables used:}\hfill\\*
3453 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3454 \multicolumn{1}{c}{Name} &
3455 \multicolumn{1}{c}{Type} &
3456 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3457 \multicolumn{1}{c}{Signed?} &
3458 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3459 \locvar{NBITS} & Integer & 36 & No & The length of a bit string to decode. \\
3460 \locvar{BITS} & Bit string & & & A decoded set of flags. \\
3461 \locvar{SBPCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3462 1 & No & An \bitvar{NSBS}-element array of flags
3463 indicating whether or not each super block is partially coded. \\
3464 \locvar{SBFCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3465 1 & No & An \bitvar{NSBS}-element array of flags
3466 indicating whether or not each non-partially coded super block is fully
3467 coded. \\
3468 \locvar{\sbi} & Integer & 32 & No & The index of the current super
3469 block. \\
3470 \locvar{\bi} & Integer & 36 & No & The index of the current block in coded
3471 order. \\
3472 \bottomrule\end{tabularx}
3473 \medskip
3475 This procedure determines which blocks are coded in a given frame.
3476 In an intra frame, it marks all blocks coded.
3477 In an inter frame, however, any or all of the blocks may remain uncoded.
3478 The output is a list of bit flags, one for each block, marking it coded or not
3479 coded.
3481 It is important to note that flags are still decoded for any blocks which lie
3482 entirely outside the picture region, even though they are not displayed.
3483 Encoders MAY choose to code such blocks.
3484 Decoders MUST faithfully reconstruct such blocks, because their contents can be
3485 used for predictors in future frames.
3486 Flags are \textit{not} decoded for portions of a super block which lie outside
3487 the full frame, as there are no blocks in those regions.
3489 The complete procedure is as follows:
3491 \begin{enumerate}
3492 \item
3493 If \bitvar{FTYPE} is zero (intra frame):
3494 \begin{enumerate}
3495 \item
3496 For each consecutive value of \locvar{\bi} from 0 to $(\locvar{NBS}-1)$, assign
3497 $\bitvar{BCODED}[\locvar{\bi}]$ the value one.
3498 \end{enumerate}
3499 \item
3500 Otherwise (inter frame):
3501 \begin{enumerate}
3502 \item
3503 Assign \locvar{NBITS} the value \bitvar{NSBS}.
3504 \item
3505 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3506 described in Section~\ref{sub:long-run}.
3507 This represents the list of partially coded super blocks.
3508 \item
3509 For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$,
3510 remove the bit at the head of the string \locvar{BITS} and assign it to
3511 $\locvar{SBPCODED}[\locvar{\sbi}]$.
3512 \item
3513 Assign \locvar{NBITS} the total number of super blocks such that \\
3514 $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero.
3515 \item
3516 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3517 described in Section~\ref{sub:long-run}.
3518 This represents the list of fully coded super blocks.
3519 \item
3520 For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$ such
3521 that $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero, remove the bit at the
3522 head of the string \locvar{BITS} and assign it to
3523 $\locvar{SBFCODED}[\locvar{\sbi}]$.
3524 \item
3525 Assign \locvar{NBITS} the number of blocks contained in super blocks where
3526 $\locvar{SBPCODED}[\locvar{\sbi}]$ equals one.
3527 Note that this might {\em not} be equal to 16 times the number of partially
3528 coded super blocks, since super blocks which overlap the edge of the frame
3529 will have fewer than 16 blocks in them.
3530 \item
3531 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3532 described in Section~\ref{sub:short-run}.
3533 \item
3534 For each block in coded order---indexed by \locvar{\bi}:
3535 \begin{enumerate}
3536 \item
3537 Assign \locvar{\sbi} the index of the super block containing block
3538 \locvar{\bi}.
3539 \item
3540 If $\locvar{SBPCODED}[\locvar{\sbi}]$ is zero, assign
3541 $\bitvar{BCODED}[\locvar{\bi}]$ the value $\locvar{SBFCODED}[\locvar{\sbi}]$.
3542 \item
3543 Otherwise, remove the bit at the head of the string \locvar{BITS} and assign it
3544 to $\bitvar{BCODED}[\locvar{\bi}]$.
3545 \end{enumerate}
3546 \end{enumerate}
3547 \end{enumerate}
3549 \section{Macro Block Coding Modes}
3550 \label{sub:mb-modes}
3552 \paragraph{Input parameters:}\hfill\\*
3553 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3554 \multicolumn{1}{c}{Name} &
3555 \multicolumn{1}{c}{Type} &
3556 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3557 \multicolumn{1}{c}{Signed?} &
3558 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3559 \bitvar{FTYPE} & Integer & 1 & No & The frame type. \\
3560 \bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a
3561 frame. \\
3562 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
3563 frame. \\
3564 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3565 1 & No & An \bitvar{NBS}-element array of flags
3566 indicating which blocks are coded. \\
3567 \bottomrule\end{tabularx}
3569 \paragraph{Output parameters:}\hfill\\*
3570 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3571 \multicolumn{1}{c}{Name} &
3572 \multicolumn{1}{c}{Type} &
3573 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3574 \multicolumn{1}{c}{Signed?} &
3575 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3576 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
3577 3 & No & An \bitvar{NMBS}-element array of coding
3578 modes for each macro block. \\
3579 \bottomrule\end{tabularx}
3581 \paragraph{Variables used:}\hfill\\*
3582 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3583 \multicolumn{1}{c}{Name} &
3584 \multicolumn{1}{c}{Type} &
3585 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3586 \multicolumn{1}{c}{Signed?} &
3587 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3588 \locvar{MSCHEME} & Integer & 3 & No & The mode coding scheme. \\
3589 \locvar{MALPHABET} & \multicolumn{1}{p{40pt}}{Integer array}
3590 & 3 & No & The list of modes corresponding to each
3591 Huffman code. \\
3592 \locvar{\mbi} & Integer & 32 & No & The index of the current macro
3593 block. \\
3594 \locvar{\bi} & Integer & 36 & No & The index of the current block in
3595 coded order. \\
3596 \locvar{\mi} & Integer & 32 & No & The index of a Huffman code from
3597 Table~\ref{tab:mode-codes}, starting from $0$. \\
3598 \bottomrule\end{tabularx}
3599 \medskip
3601 In an intra frame, every macro block marked as coded in INTRA mode.
3602 In an inter frame, however, a macro block can be coded in one of eight coding
3603 modes, given in Table~\ref{tab:coding-modes}.
3604 All of the blocks in all color planes contained in a macro block will be
3605 assigned the coding mode of that macro block.
3607 \begin{table}[htbp]
3608 \begin{center}
3609 \begin{tabular}{cl}\toprule
3610 Index & Coding Mode \\\midrule
3611 $0$ & INTER\_NOMV \\
3612 $1$ & INTRA \\
3613 $2$ & INTER\_MV \\
3614 $3$ & INTER\_MV\_LAST \\
3615 $4$ & INTER\_MV\_LAST2 \\
3616 $5$ & INTER\_GOLDEN\_NOMV \\
3617 $6$ & INTER\_GOLDEN\_MV \\
3618 $7$ & INTER\_MV\_FOUR \\
3619 \bottomrule\end{tabular}
3620 \end{center}
3621 \caption{Coding Modes}
3622 \label{tab:coding-modes}
3623 \end{table}
3625 An important thing to note is that a coding mode is only stored in the
3626 bitstream for a macro block if it has at least one {\em luma} block coded.
3627 A macro block that contains coded blocks in the chroma planes, but not in the
3628 luma plane, MUST be coded in INTER\_NOMV mode.
3629 Thus, no coding mode needs to be decoded for such a macro block.
3631 Coding modes are encoded using one of eight different schemes.
3632 Schemes 0 through 6 use the same simple Huffman code to represent the mode
3633 numbers, as given in Table~\ref{tab:mode-codes}.
3634 The difference in the schemes is the mode number assigned to each code.
3635 Scheme 0 uses an assignment specified in the bitstream, while schemes 1--6 use
3636 a fixed assignment, also given in Table~\ref{tab:mode-codes}.
3637 Scheme 7 simply codes each mode directly in the bitstream using three bits.
3639 \begin{table}[htbp]
3640 \begin{center}
3641 \begin{tabular}{lcccccc}\toprule
3642 Scheme & $1$ & $2$ & $3$ & $4$ & $5$ & $6$ \\\cmidrule{2-7}
3643 Huffman Code & \multicolumn{6}{c}{Coding Mode} \\\midrule
3644 \bin{0} & $3$ & $3$ & $3$ & $3$ & $0$ & $0$ \\
3645 \bin{10} & $4$ & $4$ & $2$ & $2$ & $3$ & $5$ \\
3646 \bin{110} & $2$ & $0$ & $4$ & $0$ & $4$ & $3$ \\
3647 \bin{1110} & $0$ & $2$ & $0$ & $4$ & $2$ & $4$ \\
3648 \bin{11110} & $1$ & $1$ & $1$ & $1$ & $1$ & $2$ \\
3649 \bin{111110} & $5$ & $5$ & $5$ & $5$ & $5$ & $1$ \\
3650 \bin{1111110} & $6$ & $6$ & $6$ & $6$ & $6$ & $6$ \\
3651 \bin{1111111} & $7$ & $7$ & $7$ & $7$ & $7$ & $7$ \\
3652 \bottomrule\end{tabular}
3653 \end{center}
3654 \caption{Coding Modes}
3655 \label{tab:mode-codes}
3656 \end{table}
3658 \begin{enumerate}
3659 \item
3660 If \bitvar{FTYPE} is 0 (intra frame):
3661 \begin{enumerate}
3662 \item
3663 For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$,
3664 inclusive, assign $\bitvar{MBMODES}[\mbi]$ the value 0 (INTRA).
3665 \end{enumerate}
3666 \item
3667 Otherwise (inter frame):
3668 \begin{enumerate}
3669 \item
3670 Read a 3-bit unsigned integer as \locvar{MSCHEME}.
3671 \item
3672 If \locvar{MSCHEME} is 0:
3673 \begin{enumerate}
3674 \item
3675 For each consecutive value of \locvar{MODE} from 0 to 7, inclusive:
3676 \begin{enumerate}
3677 \item
3678 Read a 3-bit unsigned integer as \locvar{\mi}.
3679 \item
3680 Assign $\locvar{MALPHABET}[\mi]$ the value \locvar{MODE}.
3681 \end{enumerate}
3682 \end{enumerate}
3683 \item
3684 Otherwise, if \locvar{MSCHEME} is not 7, assign the entries of
3685 \locvar{MALPHABET} the values in the corresponding column of
3686 Table~\ref{tab:mode-codes}.
3687 \item
3688 For each consecutive macro block in coded order (cf.
3689 Section~\ref{sec:mbs})---indexed by \locvar{\mbi}:
3690 \begin{enumerate}
3691 \item
3692 If a block \locvar{\bi} in the luma plane of macro block \locvar{\mbi} exists
3693 such that $\bitvar{BCODED}[\locvar{\bi}]$ is 1:
3694 \begin{enumerate}
3695 \item
3696 If \locvar{MSCHEME} is not 7, read one bit at a time until one of the Huffman
3697 codes in Table~\ref{tab:mode-codes} is recognized, and assign
3698 $\bitvar{MBMODES}[\locvar{\mbi}]$ the value
3699 $\locvar{MALPHABET}[\locvar{\mi}]$, where \locvar{\mi} is the index of the
3700 Huffman code decoded.
3701 \item
3702 Otherwise, if no luma-plane blocks in the macro block are coded, read a 3-bit
3703 unsigned integer as $\bitvar{MBMODES}[\locvar{\mbi}]$.
3704 \end{enumerate}
3705 \item
3706 Otherwise, assign $\bitvar{MBMODE}[\locvar{\mbi}]$ the value 0 (INTER\_NOMV).
3707 \end{enumerate}
3708 \end{enumerate}
3709 \end{enumerate}
3711 \section{Motion Vectors}
3713 In an intra frame, no motion vectors are used, and so motion vector decoding is
3714 skipped.
3715 In an inter frame, however, many of the inter coding modes require a motion
3716 vector in order to specify an offset into the reference frame from which to
3717 predict a block.
3718 These procedures assigns such a motion vector to every block.
3720 \subsection{Motion Vector Decode}
3721 \label{sub:mv-decode}
3723 \paragraph{Input parameters:}\hfill\\*
3724 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3725 \multicolumn{1}{c}{Name} &
3726 \multicolumn{1}{c}{Type} &
3727 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3728 \multicolumn{1}{c}{Signed?} &
3729 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3730 \bitvar{MVMODE} & Integer & 1 & No & The motion vector decoding method. \\
3731 \bottomrule\end{tabularx}
3733 \paragraph{Output parameters:}\hfill\\*
3734 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3735 \multicolumn{1}{c}{Name} &
3736 \multicolumn{1}{c}{Type} &
3737 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3738 \multicolumn{1}{c}{Signed?} &
3739 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3740 \bitvar{MVX} & Integer & 6 & Yes & The X component of the motion
3741 vector. \\
3742 \bitvar{MVY} & Integer & 6 & Yes & The Y component of the motion
3743 vector. \\
3744 \bottomrule\end{tabularx}
3746 \paragraph{Variables used:}\hfill\\*
3747 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3748 \multicolumn{1}{c}{Name} &
3749 \multicolumn{1}{c}{Type} &
3750 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3751 \multicolumn{1}{c}{Signed?} &
3752 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3753 \locvar{MVSIGN} & Integer & 1 & No & The sign of the motion vector component
3754 just decoded. \\
3755 \bottomrule\end{tabularx}
3756 \medskip
3758 The individual components of a motion vector can be coded using one of two
3759 methods.
3760 The first uses a variable length Huffman code, given in
3761 Table~\ref{tab:mv-huff-codes}.
3762 The second encodes the magnitude of the component directly in 5 bits, and the
3763 sign in one bit.
3764 Note that in this case there are two representations for the value zero.
3765 For compatibility with VP3, a sign bit is read even if the magnitude read is
3766 zero.
3767 One scheme is chosen and used for the entire frame.
3769 Each component can take on integer values from $-31\ldots 31$, inclusive, at
3770 half-pixel resolution, i.e. $-15.5\ldots 15.5$ pixels in the luma plane.
3771 For each subsampled axis in the chroma planes, the corresponding motion vector
3772 component is interpreted as being at quarter-pixel resolution, i.e.
3773 $-7.75\ldots 7.75$ pixels.
3774 The precise details of how these vectors are used to compute predictors for
3775 each block are described in Section~\ref{sec:predictors}.
3777 \begin{table}[ht]
3778 \begin{center}
3779 \begin{tabular}{lrlr}\toprule
3780 Huffman Code & Value & Huffman Code & Value \\\midrule
3781 \bin{000} & $0$ \\
3782 \bin{001} & $1$ & \bin{010} & $-1$ \\
3783 \bin{0110} & $2$ & \bin{0111} & $-2$ \\
3784 \bin{1000} & $3$ & \bin{1001} & $-3$ \\
3785 \bin{101000} & $4$ & \bin{101001} & $-4$ \\
3786 \bin{101010} & $5$ & \bin{101011} & $-5$ \\
3787 \bin{101100} & $6$ & \bin{101101} & $-6$ \\
3788 \bin{101110} & $7$ & \bin{101111} & $-7$ \\
3789 \bin{1100000} & $8$ & \bin{1100001} & $-8$ \\
3790 \bin{1100010} & $9$ & \bin{1100011} & $-9$ \\
3791 \bin{1100100} & $10$ & \bin{1100101} & $-10$ \\
3792 \bin{1100110} & $11$ & \bin{1100111} & $-11$ \\
3793 \bin{1101000} & $12$ & \bin{1101001} & $-12$ \\
3794 \bin{1101010} & $13$ & \bin{1101011} & $-13$ \\
3795 \bin{1101100} & $14$ & \bin{1101101} & $-14$ \\
3796 \bin{1101110} & $15$ & \bin{1101111} & $-15$ \\
3797 \bin{11100000} & $16$ & \bin{11100001} & $-16$ \\
3798 \bin{11100010} & $17$ & \bin{11100011} & $-17$ \\
3799 \bin{11100100} & $18$ & \bin{11100101} & $-18$ \\
3800 \bin{11100110} & $19$ & \bin{11100111} & $-19$ \\
3801 \bin{11101000} & $20$ & \bin{11101001} & $-20$ \\
3802 \bin{11101010} & $21$ & \bin{11101011} & $-21$ \\
3803 \bin{11101100} & $22$ & \bin{11101101} & $-22$ \\
3804 \bin{11101110} & $23$ & \bin{11101111} & $-23$ \\
3805 \bin{11110000} & $24$ & \bin{11110001} & $-24$ \\
3806 \bin{11110010} & $25$ & \bin{11110011} & $-25$ \\
3807 \bin{11110100} & $26$ & \bin{11110101} & $-26$ \\
3808 \bin{11110110} & $27$ & \bin{11110111} & $-27$ \\
3809 \bin{11111000} & $28$ & \bin{11111001} & $-28$ \\
3810 \bin{11111010} & $29$ & \bin{11111011} & $-29$ \\
3811 \bin{11111100} & $30$ & \bin{11111101} & $-30$ \\
3812 \bin{11111110} & $31$ & \bin{11111111} & $-31$ \\
3813 \bottomrule\end{tabular}
3814 \end{center}
3815 \caption{Huffman Codes for Motion Vector Components}
3816 \label{tab:mv-huff-codes}
3817 \end{table}
3819 A single motion vector is decoded is follows:
3821 \begin{enumerate}
3822 \item
3823 If \bitvar{MVMODE} is 0:
3824 \begin{enumerate}
3825 \item
3826 Read 1 bit at a time until one of the Huffman codes in
3827 Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to
3828 \locvar{MVX}.
3829 \item
3830 Read 1 bit at a time until one of the Huffman codes in
3831 Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to
3832 \locvar{MVY}.
3833 \end{enumerate}
3834 \item
3835 Otherwise:
3836 \begin{enumerate}
3837 \item
3838 Read a 5-bit unsigned integer as \bitvar{MVX}.
3839 \item
3840 Read a 1-bit unsigned integer as \locvar{MVSIGN}.
3841 \item
3842 If \locvar{MVSIGN} is 1, assign \bitvar{MVX} the value $-\bitvar{MVX}$.
3843 \item
3844 Read a 5-bit unsigned integer as \bitvar{MVY}.
3845 \item
3846 Read a 1-bit unsigned integer as \locvar{MVSIGN}.
3847 \item
3848 If \locvar{MVSIGN} is 1, assign \bitvar{MVY} the value $-\bitvar{MVY}$.
3849 \end{enumerate}
3850 \end{enumerate}
3852 \subsection{Macro Block Motion Vector Decode}
3853 \label{sub:mb-mv-decode}
3855 \paragraph{Input parameters:}\hfill\\*
3856 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3857 \multicolumn{1}{c}{Name} &
3858 \multicolumn{1}{c}{Type} &
3859 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3860 \multicolumn{1}{c}{Signed?} &
3861 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3862 \bitvar{PF} & Integer & 2 & No & The pixel format. \\
3863 \bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a
3864 frame. \\
3865 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
3866 3 & No & An \bitvar{NMBS}-element array of coding
3867 modes for each macro block. \\
3868 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
3869 frame. \\
3870 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3871 1 & No & An \bitvar{NBS}-element array of flags
3872 indicating which blocks are coded. \\
3873 \bottomrule\end{tabularx}
3875 \paragraph{Output parameters:}\hfill\\*
3876 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3877 \multicolumn{1}{c}{Name} &
3878 \multicolumn{1}{c}{Type} &
3879 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3880 \multicolumn{1}{c}{Signed?} &
3881 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3882 \bitvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
3883 6 & Yes & An \bitvar{NBS}-element array of
3884 motion vectors for each block. \\
3885 \bottomrule\end{tabularx}
3887 \paragraph{Variables used:}\hfill\\*
3888 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3889 \multicolumn{1}{c}{Name} &
3890 \multicolumn{1}{c}{Type} &
3891 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3892 \multicolumn{1}{c}{Signed?} &
3893 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3894 \locvar{LAST1} & \multicolumn{1}{p{50pt}}{2D Integer Vector} &
3895 6 & Yes & The last motion vector. \\
3896 \locvar{LAST2} & \multicolumn{1}{p{50pt}}{2D Integer Vector} &
3897 6 & Yes & The second to last motion vector. \\
3898 \locvar{MVX} & Integer & 6 & Yes & The X component of a motion vector. \\
3899 \locvar{MVY} & Integer & 6 & Yes & The Y component of a motion vector. \\
3900 \locvar{\mbi} & Integer & 32 & No & The index of the current macro
3901 block. \\
3902 \locvar{A} & Integer & 36 & No & The index of the lower-left luma block
3903 in the macro block. \\
3904 \locvar{B} & Integer & 36 & No & The index of the lower-right luma
3905 block in the macro block. \\
3906 \locvar{C} & Integer & 36 & No & The index of the upper-left luma block
3907 in the macro block. \\
3908 \locvar{D} & Integer & 36 & No & The index of the upper-right luma
3909 block in the macro block. \\
3910 \locvar{E} & Integer & 36 & No & The index of a chroma block in the
3911 macro block, depending on the pixel format. \\
3912 \locvar{F} & Integer & 36 & No & The index of a chroma block in the
3913 macro block, depending on the pixel format. \\
3914 \locvar{G} & Integer & 36 & No & The index of a chroma block in the
3915 macro block, depending on the pixel format. \\
3916 \locvar{H} & Integer & 36 & No & The index of a chroma block in the
3917 macro block, depending on the pixel format. \\
3918 \locvar{I} & Integer & 36 & No & The index of a chroma block in the
3919 macro block, depending on the pixel format. \\
3920 \locvar{J} & Integer & 36 & No & The index of a chroma block in the
3921 macro block, depending on the pixel format. \\
3922 \locvar{K} & Integer & 36 & No & The index of a chroma block in the
3923 macro block, depending on the pixel format. \\
3924 \locvar{L} & Integer & 36 & No & The index of a chroma block in the
3925 macro block, depending on the pixel format. \\
3926 \bottomrule\end{tabularx}
3927 \medskip
3929 Motion vectors are stored for each macro block.
3930 In every mode except for INTER\_MV\_FOUR, every block in all the color planes
3931 are assigned the same motion vector.
3932 In INTER\_MV\_FOUR mode, all four blocks in the luma plane are assigned their
3933 own motion vector, and motion vectors for blocks in the chroma planes are
3934 computed from these, using averaging appropriate to the pixel format.
3936 For INTER\_MV and INTER\_GOLDEN\_MV modes, a single motion vector is decoded
3937 and applied to each block.
3938 For INTER\_MV\_FOUR macro blocks, a motion vector is decoded for each coded
3939 luma block.
3940 Uncoded luma blocks receive the default $(0,0)$ vector for the purposes of
3941 computing the chroma motion vectors.
3943 None of the remaining macro block coding modes require decoding motion vectors
3944 from the stream.
3945 INTRA mode does not use a motion-compensated predictor, and so requires no
3946 motion vector, and INTER\_NOMV and INTER\_GOLDEN\_NOMV modes use the default
3947 vector $(0,0)$ for each block.
3948 This also includes all macro blocks with no coded luma blocks, as they are
3949 coded in INTER\_NOMV mode by definition.
3951 The modes INTER\_MV\_LAST and INTER\_MV\_LAST2 use the motion vector from the
3952 last macro block (in coded order) and the second to last macro block,
3953 respectively, that contained a motion vector pointing to the previous frame.
3954 Thus no explicit motion vector needs to be decoded for these modes.
3955 Macro blocks coded in INTRA mode or one of the GOLDEN modes are not considered
3956 in this process.
3957 If an insufficient number of macro blocks have been coded in one of the INTER
3958 modes, then the $(0,0)$ vector is used instead.
3959 For macro blocks coded in INTER\_MV\_FOUR mode, the vector from the upper-right
3960 luma block is used, even if the upper-right block is not coded.
3962 The motion vectors are decoded from the stream as follows:
3964 \begin{enumerate}
3965 \item
3966 Assign \locvar{LAST1} and \locvar{LAST2} both the value $(0,0)$.
3967 \item
3968 Read a 1-bit unsigned integer as \locvar{MVMODE}.
3969 Note that this value is read even if no macro blocks require a motion vector to
3970 be decoded.
3971 \item
3972 For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$:
3973 \begin{enumerate}
3974 \item
3975 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 7 (INTER\_MV\_FOUR):
3976 \begin{enumerate}
3977 \item
3978 Let \locvar{A}, \locvar{B}, \locvar{C}, and \locvar{D} be the indices in coded
3979 order \locvar{\bi} of the luma blocks in macro block \locvar{\mbi}, arranged
3980 into raster order.
3981 Thus, \locvar{A} is the index in coded order of the block in the lower left,
3982 \locvar{B} the lower right, \locvar{C} the upper left, and \locvar{D} the
3983 upper right. % TODO: as shown in Figure~REF.
3984 \item
3985 If $\bitvar{BCODED}[\locvar{A}]$ is non-zero, decode a single motion vector
3986 into \locvar{MVX} and \locvar{MVY} using the procedure described in
3987 Section~\ref{sub:mv-decode}.
3988 \item
3989 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
3990 \item
3991 Assign $\bitvar{MVECTS}[\locvar{A}]$ the value $(\locvar{MVX},\locvar{MVY})$.
3992 \item
3993 If $\bitvar{BCODED}[\locvar{B}]$ is non-zero, decode a single motion vector
3994 into \locvar{MVX} and \locvar{MVY} using the procedure described in
3995 Section~\ref{sub:mv-decode}.
3996 \item
3997 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
3998 \item
3999 Assign $\bitvar{MVECTS}[\locvar{B}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4000 \item
4001 If $\bitvar{BCODED}[\locvar{C}]$ is non-zero, decode a single motion vector
4002 into \locvar{MVX} and \locvar{MVY} using the procedure described in
4003 Section~\ref{sub:mv-decode}.
4004 \item
4005 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
4006 \item
4007 Assign $\bitvar{MVECTS}[\locvar{C}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4008 \item
4009 If $\bitvar{BCODED}[\locvar{D}]$ is non-zero, decode a single motion vector
4010 into \locvar{MVX} and \locvar{MVY} using the procedure described in
4011 Section~\ref{sub:mv-decode}.
4012 \item
4013 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
4014 \item
4015 Assign $\bitvar{MVECTS}[\locvar{D}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4016 Note that \locvar{MVX} and \locvar{MVY} retain this last value.
4017 \item
4018 If \bitvar{PF} is 0 (4:2:0):
4019 \begin{enumerate}
4020 \item
4021 Let \locvar{E} and \locvar{F} be the index in coded order of the one block in
4022 the macro block from the $C_b$ and $C_r$ planes, respectively.
4023 \item
4024 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{F}]$ the
4025 value
4026 \begin{multline*}
4027 (\round\biggl(\frac{\begin{aligned}
4028 \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x+\\
4029 \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x
4030 \end{aligned}}{4}\biggr), \\
4031 \round\biggl(\frac{\begin{aligned}
4032 \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y+\\
4033 \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y
4034 \end{aligned}}{4}\biggr))
4035 \end{multline*}
4036 \end{enumerate}
4037 \item
4038 If \bitvar{PF} is 2 (4:2:2):
4039 \begin{enumerate}
4040 \item
4041 Let \locvar{E} and \locvar{F} be the indices in coded order of the top and
4042 bottom blocks in the macro block from the $C_b$ plane, respectively, and
4043 \locvar{G} and \locvar{H} be the indices in coded order of the top and bottom
4044 blocks in the $C_r$ plane, respectively. %TODO: as shown in Figure~REF.
4045 \item
4046 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{G}]$ the
4047 value
4048 \begin{multline*}
4049 (\round\left(\frac{
4050 \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x}{4}\right), \\
4051 \round\left(\frac{
4052 \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y}{4}\right))
4053 \end{multline*}
4054 \item
4055 Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{H}]$ the
4056 value
4057 \begin{multline*}
4058 (\round\left(\frac{
4059 \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x}{4}\right), \\
4060 \round\left(\frac{
4061 \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y}{4}\right))
4062 \end{multline*}
4063 \end{enumerate}
4064 \item
4065 If \bitvar{PF} is 3 (4:4:4):
4066 \begin{enumerate}
4067 \item
4068 Let \locvar{E}, \locvar{F}, \locvar{G}, and \locvar{H} be the indices
4069 \locvar{\bi} in coded order of the $C_b$ plane blocks in macro block
4070 \locvar{\mbi}, arranged into raster order, and \locvar{I}, \locvar{J},
4071 \locvar{K}, and \locvar{L} be the indices \locvar{\bi} in coded order of the
4072 $C_r$ plane blocks in macro block \locvar{\mbi}, arranged into raster order.
4073 %TODO: as shown in Figure~REF.
4074 \item
4075 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{I}]$ the
4076 value \\ $\bitvar{MVECTS}[\locvar{A}]$.
4077 \item
4078 Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{J}]$ the
4079 value \\ $\bitvar{MVECTS}[\locvar{B}]$.
4080 \item
4081 Assign $\bitvar{MVECTS}[\locvar{G}]$ and $\bitvar{MVECTS}[\locvar{K}]$ the
4082 value \\ $\bitvar{MVECTS}[\locvar{C}]$.
4083 \item
4084 Assign $\bitvar{MVECTS}[\locvar{H}]$ and $\bitvar{MVECTS}[\locvar{L}]$ the
4085 value \\ $\bitvar{MVECTS}[\locvar{D}]$.
4086 \end{enumerate}
4087 \item
4088 Assign \locvar{LAST2} the value \locvar{LAST1}.
4089 \item
4090 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4091 \end{enumerate}
4092 \item
4093 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 6 (INTER\_GOLDEN\_MV),
4094 decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the
4095 procedure described in Section~\ref{sub:mv-decode}.
4096 \item
4097 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 4 (INTER\_MV\_LAST2):
4098 \begin{enumerate}
4099 \item
4100 Assign $(\locvar{MVX},\locvar{MVY}$ the value \locvar{LAST2}.
4101 \item
4102 Assign \locvar{LAST2} the value \locvar{LAST1}.
4103 \item
4104 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4105 \end{enumerate}
4106 \item
4107 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 3 (INTER\_MV\_LAST), assign
4108 $(\locvar{MVX},\locvar{MVY})$ the value \locvar{LAST1}.
4109 \item
4110 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 2 (INTER\_MV):
4111 \begin{enumerate}
4112 \item
4113 Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the
4114 procedure described in Section~\ref{sub:mv-decode}.
4115 \item
4116 Assign \locvar{LAST2} the value \locvar{LAST1}.
4117 \item
4118 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4119 \end{enumerate}
4120 \item
4121 Otherwise (5:~INTER\_GOLDEN\_NOMV, 1:~INTRA, or \\
4122 0:~INTER\_NOMV), assign \locvar{MVX} and \locvar{MVY} the value zero.
4123 \item
4124 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is not 7 (not INTER\_MV\_FOUR), then for
4125 each coded block \locvar{\bi} in macro block \locvar{\mbi}:
4126 \begin{enumerate}
4127 \item
4128 Assign $\bitvar{MVECTS}[\locvar{\bi}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4129 \end{enumerate}
4130 \end{enumerate}
4131 \end{enumerate}
4133 \paragraph{VP3 Compatibility}
4135 Unless all four luma blocks in the macro block are coded, the VP3 encoder does
4136 not select mode INTER\_MV\_FOUR.
4137 Theora removes this restriction by treating the motion vector for an uncoded
4138 luma block as the default $(0,0)$ vector.
4139 This is consistent with the premise that the block has not changed since the
4140 previous frame and that chroma information can be largely ignored when
4141 estimating motion.
4143 No modification is required for INTER\_MV\_FOUR macro blocks in VP3 streams to
4144 be decoded correctly by a Theora decoder.
4145 However, regardless of how many of the luma blocks are actually coded, the VP3
4146 decoder always reads four motion vectors from the stream for INTER\_MV\_FOUR
4147 mode.
4148 The motion vectors read are used to calculate the motion vectors for the chroma
4149 blocks, but are otherwise ignored.
4150 Thus, care should be taken when creating Theora streams meant to be backwards
4151 compatible with VP3 to only use INTER\_MV\_FOUR mode when all four luma
4152 blocks are coded.
4154 \section{Block-Level \qi\ Decode}
4155 \label{sub:block-qis}
4157 \paragraph{Input parameters:}\hfill\\*
4158 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4159 \multicolumn{1}{c}{Name} &
4160 \multicolumn{1}{c}{Type} &
4161 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4162 \multicolumn{1}{c}{Signed?} &
4163 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4164 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
4165 frame. \\
4166 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
4167 1 & No & An \bitvar{NBS}-element array of flags
4168 indicating which blocks are coded. \\
4169 \bitvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\
4170 \bottomrule\end{tabularx}
4172 \paragraph{Output parameters:}\hfill\\*
4173 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4174 \multicolumn{1}{c}{Name} &
4175 \multicolumn{1}{c}{Type} &
4176 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4177 \multicolumn{1}{c}{Signed?} &
4178 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4179 \bitvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4180 2 & No & An \bitvar{NBS}-element array of
4181 \locvar{\qii} values for each block. \\
4182 \bottomrule\end{tabularx}
4184 \paragraph{Variables used:}\hfill\\*
4185 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4186 \multicolumn{1}{c}{Name} &
4187 \multicolumn{1}{c}{Type} &
4188 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4189 \multicolumn{1}{c}{Signed?} &
4190 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4191 \locvar{NBITS} & Integer & 36 & No & The length of a bit string to decode. \\
4192 \locvar{BITS} & Bit string & & & A decoded set of flags. \\
4193 \locvar{\bi} & Integer & 36 & No & The index of the current block in
4194 coded order. \\
4195 \locvar{\qii} & Integer & 2 & No & The index of \qi\ value in the list of
4196 \qi\ values defined for this frame. \\
4197 \bottomrule\end{tabularx}
4198 \medskip
4200 This procedure selects the \qi\ value to be used for dequantizing the AC
4201 coefficients of each block.
4202 DC coefficients all use the same \qi\ value, so as to avoid interference with
4203 the DC prediction mechanism, which occurs in the quantized domain.
4205 The value is actually represented by an index \locvar{\qii} into the list of
4206 \qi\ values defined for the frame.
4207 It makes multiple passes through the list of coded blocks, one for each \qi\
4208 value except the last one.
4209 In each pass, an RLE-coded bitmask is decoded to divide the blocks into two
4210 groups: those that use a value of \qi\ from later in the list, and those that
4211 do not.
4212 Each block in the second group is assigned the current \qi\ value.
4213 Each subsequent pass is restricted to the blocks in the first group.
4215 \begin{enumerate}
4216 \item
4217 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign
4218 $\bitvar{QIIS}[\locvar{\bi}]$ the value zero.
4219 \item
4220 For each consecutive value of \locvar{\qii} from 0 to $(\bitvar{NQIS}-2)$:
4221 \begin{enumerate}
4222 \item
4223 Assign \locvar{NBITS} be the number of blocks \locvar{\bi} such that
4224 $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and $\bitvar{QIIS}[\locvar{\bi}]$
4225 equals $\locvar{\qii}$.
4226 \item
4227 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
4228 described in Section~\ref{sub:long-run}.
4229 This represents the list of blocks that use \qi\ value \locvar{\qii} or higher.
4230 \item
4231 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ such
4232 that $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and
4233 $\bitvar{QIIS}[\locvar{\bi}]$ equals $\locvar{\qii}$:
4234 \begin{enumerate}
4235 \item
4236 Remove the bit at the head of the string \locvar{BITS} and add its value to
4237 $\bitvar{QIIS}[\locvar{\bi}]$.
4238 \end{enumerate}
4239 \end{enumerate}
4240 \end{enumerate}
4242 \paragraph{VP3 Compatibility}
4244 For VP3 compatible streams, only one \qi\ value can be specified in the frame
4245 header, so the main loop of the above procedure, which goes to
4246 $\bitvar{NQIIS}-2$ instead of $\bitvar{NQIIS}-1$, is never executed.
4247 Thus, no bits are read and each block uses the one \qi\ value defined for the
4248 frame.
4250 \cleardoublepage
4252 \section{DCT Coefficients}
4253 \label{sec:dct-decode}
4255 The quantized DCT coefficients are decoded by making 64 passes through the list
4256 of coded blocks, one for each token index in zig-zag order.
4257 For the DC tokens, two Huffman tables are chosen from among the first 16, one
4258 for the luma plane and one for the chroma planes.
4259 The AC tokens, however, are divided into four different groups.
4260 Again, two 4-bit indices are decoded, one for the luma plane, and one for the
4261 chroma planes, but these select the codebooks for {\em all four} groups.
4262 AC coefficients in group one use codebooks $16\ldots 31$, while group two uses
4263 $32\ldots 47$, etc.
4264 Note that this second set of indices is decoded even if there are no non-zero
4265 AC coefficients in the frame.
4267 Tokens are divided into two major types: EOB tokens, which fill the remainder
4268 of one or more blocks with zeros, and coefficient tokens, which fill in one or
4269 more coefficients within a single block.
4270 A decoding procedure for the first is given in Section~\ref{sub:eob-token}, and
4271 for the second in Section~\ref{sub:coeff-token}.
4272 The decoding procedure for the complete set of quantized coefficients is given
4273 in Section~\ref{sub:dct-coeffs}.
4275 \subsection{EOB Token Decode}
4276 \label{sub:eob-token}
4278 \paragraph{Input parameters:}\hfill\\*
4279 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4280 \multicolumn{1}{c}{Name} &
4281 \multicolumn{1}{c}{Type} &
4282 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4283 \multicolumn{1}{c}{Signed?} &
4284 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4285 \bitvar{TOKEN} & Integer & 5 & No & The token being decoded.
4286 This must be in the range $0\ldots 6$. \\
4287 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
4288 frame. \\
4289 \bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4290 7 & No & An \bitvar{NBS}-element array of the
4291 current token index for each block. \\
4292 \bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4293 7 & No & An \bitvar{NBS}-element array of the
4294 coefficient count for each block. \\
4295 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4296 16 & Yes & An $\bitvar{NBS}\times 64$ array of
4297 quantized DCT coefficient values for each block in zig-zag order. \\
4298 \bitvar{\bi} & Integer & 36 & No & The index of the current block in
4299 coded order. \\
4300 \bitvar{\ti} & Integer & 6 & No & The current token index. \\
4301 \bottomrule\end{tabularx}
4303 \paragraph{Output parameters:}\hfill\\*
4304 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4305 \multicolumn{1}{c}{Name} &
4306 \multicolumn{1}{c}{Type} &
4307 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4308 \multicolumn{1}{c}{Signed?} &
4309 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4310 \bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4311 7 & No & An \bitvar{NBS}-element array of the
4312 current token index for each block. \\
4313 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4314 16 & Yes & An $\bitvar{NBS}\times 64$ array of
4315 quantized DCT coefficient values for each block in zig-zag order. \\
4316 \bitvar{EOBS} & Integer & 36 & No & The remaining length of the current
4317 EOB run. \\
4318 \bottomrule\end{tabularx}
4320 \paragraph{Variables used:}\hfill\\*
4321 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4322 \multicolumn{1}{c}{Name} &
4323 \multicolumn{1}{c}{Type} &
4324 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4325 \multicolumn{1}{c}{Signed?} &
4326 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4327 \locvar{\bj} & Integer & 36 & No & Another index of a block in coded
4328 order. \\
4329 \locvar{\tj} & Integer & 6 & No & Another token index. \\
4330 \bottomrule\end{tabularx}
4331 \medskip
4333 A summary of the EOB tokens is given in Table~\ref{tab:eob-tokens}.
4334 An important thing to note is that token 6 does not add an offset to the
4335 decoded run value, even though in general it should only be used for runs of
4336 size 32 or longer.
4337 If a value of zero is decoded for this run, it is treated as an EOB run the
4338 size of the remaining coded blocks.
4340 \begin{table}[htbp]
4341 \begin{center}
4342 \begin{tabular}{ccl}\toprule
4343 Token Value & Extra Bits & EOB Run Lengths \\\midrule
4344 $0$ & $0$ & $1$ \\
4345 $1$ & $0$ & $2$ \\
4346 $2$ & $0$ & $3$ \\
4347 $3$ & $2$ & $4\ldots 7$ \\
4348 $4$ & $3$ & $8\ldots 15$ \\
4349 $5$ & $4$ & $16\ldots 31$ \\
4350 $6$ & $12$ & $1\ldots 4095$, or all remaining blocks \\
4351 \bottomrule\end{tabular}
4352 \end{center}
4353 \caption{EOB Token Summary}
4354 \label{tab:eob-tokens}
4355 \end{table}
4357 There is no restriction that one EOB token cannot be immediately followed by
4358 another, so no special cases are necessary to extend the range of the maximum
4359 run length as were required in Section~\ref{sub:long-run}.
4360 Indeed, depending on the lengths of the Huffman codes, it may even cheaper to
4361 encode, by way of example, an EOB run of length 31 followed by an EOB run of
4362 length 1 than to encode an EOB run of length 32 directly.
4363 There is also no restriction that an EOB run stop at the end of a color plane
4364 or a token index.
4365 The run MUST, however, end at or before the end of the frame.
4367 \begin{enumerate}
4368 \item
4369 If \bitvar{TOKEN} is 0, assign \bitvar{EOBS} the value 1.
4370 \item
4371 Otherwise, if \bitvar{TOKEN} is 1, assign \bitvar{EOBS} the value 2.
4372 \item
4373 Otherwise, if \bitvar{TOKEN} is 2, assign \bitvar{EOBS} the value 3.
4374 \item
4375 Otherwise, if \bitvar{TOKEN} is 3:
4376 \begin{enumerate}
4377 \item
4378 Read a 2-bit unsigned integer as \bitvar{EOBS}.
4379 \item
4380 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+4)$.
4381 \end{enumerate}
4382 \item
4383 Otherwise, if \bitvar{TOKEN} is 4:
4384 \begin{enumerate}
4385 \item
4386 Read a 3-bit unsigned integer as \bitvar{EOBS}.
4387 \item
4388 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+8)$.
4389 \end{enumerate}
4390 \item
4391 Otherwise, if \bitvar{TOKEN} is 5:
4392 \begin{enumerate}
4393 \item
4394 Read a 4-bit unsigned integer as \bitvar{EOBS}.
4395 \item
4396 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+16)$.
4397 \end{enumerate}
4398 \item
4399 Otherwise, \bitvar{TOKEN} is 6:
4400 \begin{enumerate}
4401 \item
4402 Read a 12-bit unsigned integer as \bitvar{EOBS}.
4403 \item
4404 If \bitvar{EOBS} is zero, assign \bitvar{EOBS} to be the number of coded blocks
4405 \locvar{\bj} such that $\bitvar{TIS}[\locvar{\bj}]$ is less than 64.
4406 \end{enumerate}
4407 \item
4408 For each value of \locvar{\tj} from $\bitvar{\ti}$ to 63, assign
4409 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4410 \item
4411 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4412 \item
4413 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value 64.
4414 \item
4415 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}-1)$.
4416 \end{enumerate}
4418 \paragraph{VP3 Compatibility}
4420 The VP3 encoder does not use the special interpretation of a zero-length EOB
4421 run, though its decoder {\em does} support it.
4422 That may be due more to a happy accident in the way the decoder was written
4423 than intentional design, however, and other VP3 implementations might not
4424 reproduce it faithfully.
4425 For backwards compatibility, it may be wise to avoid it, especially as for most
4426 frame sizes there are fewer than 4095 blocks, making it unnecessary.
4428 \subsection{Coefficient Token Decode}
4429 \label{sub:coeff-token}
4431 \paragraph{Input parameters:}\hfill\\*
4432 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4433 \multicolumn{1}{c}{Name} &
4434 \multicolumn{1}{c}{Type} &
4435 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4436 \multicolumn{1}{c}{Signed?} &
4437 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4438 \bitvar{TOKEN} & Integer & 5 & No & The token being decoded.
4439 This must be in the range $7\ldots 31$. \\
4440 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
4441 frame. \\
4442 \bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4443 7 & No & An \bitvar{NBS}-element array of the
4444 current token index for each block. \\
4445 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4446 16 & Yes & An $\bitvar{NBS}\times 64$ array of
4447 quantized DCT coefficient values for each block in zig-zag order. \\
4448 \bitvar{\bi} & Integer & 36 & No & The index of the current block in
4449 coded order. \\
4450 \bitvar{\ti} & Integer & 6 & No & The current token index. \\
4451 \bottomrule\end{tabularx}
4453 \paragraph{Output parameters:}\hfill\\*
4454 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4455 \multicolumn{1}{c}{Name} &
4456 \multicolumn{1}{c}{Type} &
4457 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4458 \multicolumn{1}{c}{Signed?} &
4459 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4460 \bitvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4461 7 & No & An \bitvar{NBS}-element array of the
4462 current token index for each block. \\
4463 \bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
4464 7 & No & An \bitvar{NBS}-element array of the
4465 coefficient count for each block. \\
4466 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4467 16 & Yes & An $\bitvar{NBS}\times 64$ array of
4468 quantized DCT coefficient values for each block in zig-zag order. \\
4469 \bottomrule\end{tabularx}
4471 \paragraph{Variables used:}\hfill\\*
4472 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4473 \multicolumn{1}{c}{Name} &
4474 \multicolumn{1}{c}{Type} &
4475 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4476 \multicolumn{1}{c}{Signed?} &
4477 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4478 \locvar{SIGN} & Integer & 1 & No & A flag indicating the sign of the
4479 current coefficient. \\
4480 \locvar{MAG} & Integer & 10 & No & The magnitude of the current
4481 coefficient. \\
4482 \locvar{RLEN} & Integer & 6 & No & The length of the current zero run. \\
4483 \locvar{\tj} & Integer & 6 & No & Another token index. \\
4484 \bottomrule\end{tabularx}
4485 \medskip
4487 Each of these tokens decodes one or more coefficients in the current block.
4488 A summary of the meanings of the token values is presented in
4489 Table~\ref{tab:coeff-tokens}.
4490 There are often several different ways to tokenize a given coefficient list.
4491 Which one is optimal depends on the exact lengths of the Huffman codes used to
4492 represent each token.
4493 Note that we do not update the coefficient count for the block if we decode a
4494 pure zero run.
4496 \begin{table}[htbp]
4497 \begin{center}
4498 \begin{tabularx}{\textwidth}{cclX}\toprule
4499 Token Value & Extra Bits & \multicolumn{1}{p{55pt}}{Number of Coefficients}
4500 & Description \\\midrule
4501 $7$ & $3$ & $1\ldots 8$ & Short zero run. \\
4502 $8$ & $6$ & $1\ldots 64$ & Zero run. \\
4503 $9$ & $0$ & $1$ & $1$. \\
4504 $10$ & $0$ & $1$ & $-1$. \\
4505 $11$ & $0$ & $1$ & $2$. \\
4506 $12$ & $0$ & $1$ & $-2$. \\
4507 $13$ & $1$ & $1$ & $\pm 3$. \\
4508 $14$ & $1$ & $1$ & $\pm 4$. \\
4509 $15$ & $1$ & $1$ & $\pm 5$. \\
4510 $16$ & $1$ & $1$ & $\pm 6$. \\
4511 $17$ & $2$ & $1$ & $\pm 7\ldots 8$. \\
4512 $18$ & $3$ & $1$ & $\pm 9\ldots 12$. \\
4513 $19$ & $4$ & $1$ & $\pm 13\ldots 20$. \\
4514 $20$ & $5$ & $1$ & $\pm 21\ldots 36$. \\
4515 $21$ & $6$ & $1$ & $\pm 37\ldots 68$. \\
4516 $22$ & $10$ & $1$ & $\pm 69\ldots 580$. \\
4517 $23$ & $1$ & $2$ & One zero followed by $\pm 1$. \\
4518 $24$ & $1$ & $3$ & Two zeros followed by $\pm 1$. \\
4519 $25$ & $1$ & $4$ & Three zeros followed by
4520 $\pm 1$. \\
4521 $26$ & $1$ & $5$ & Four zeros followed by
4522 $\pm 1$. \\
4523 $27$ & $1$ & $6$ & Five zeros followed by
4524 $\pm 1$. \\
4525 $28$ & $3$ & $7\ldots 10$ & $6\ldots 9$ zeros followed by
4526 $\pm 1$. \\
4527 $29$ & $4$ & $11\ldots 18$ & $10\ldots 17$ zeros followed by
4528 $\pm 1$.\\
4529 $30$ & $2$ & $2$ & One zero followed by
4530 $\pm 2\ldots 3$. \\
4531 $31$ & $3$ & $3\ldots 4$ & $2\ldots 3$ zeros followed by
4532 $\pm 2\ldots 3$. \\
4533 \bottomrule\end{tabularx}
4534 \end{center}
4535 \caption{Coefficient Token Summary}
4536 \label{tab:coeff-tokens}
4537 \end{table}
4539 For tokens which represent more than one coefficient, they MUST NOT bring the
4540 total number of coefficients in the block to more than 64.
4541 Care should be taken in a decoder to check for this, as otherwise it may permit
4542 buffer overflows from invalidly formed packets.
4543 \begin{verse}
4544 {\bf Note:} One way to achieve this efficiently is to combine the inverse
4545 zig-zag mapping (described later in Section~\ref{sub:dequant}) with
4546 coefficient decode, and use a table look-up to map zig-zag indices greater
4547 than 63 to a safe location.
4548 \end{verse}
4550 \begin{enumerate}
4551 \item
4552 If \bitvar{TOKEN} is 7:
4553 \begin{enumerate}
4554 \item
4555 Read in a 3-bit unsigned integer as \locvar{RLEN}.
4556 \item
4557 Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$.
4558 \item
4559 For each value of \locvar{\tj} from \bitvar{\ti} to
4560 $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4561 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4562 \item
4563 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4564 $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$.
4565 \end{enumerate}
4566 \item
4567 Otherwise, if \bitvar{TOKEN} is 8:
4568 \begin{enumerate}
4569 \item
4570 Read in a 6-bit unsigned integer as \locvar{RLEN}.
4571 \item
4572 Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$.
4573 \item
4574 For each value of \locvar{\tj} from \bitvar{\ti} to
4575 $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4576 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4577 \item
4578 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4579 $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$.
4580 \end{enumerate}
4581 \item
4582 Otherwise, if \bitvar{TOKEN} is 9:
4583 \begin{enumerate}
4584 \item
4585 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $1$.
4586 \item
4587 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4588 \item
4589 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4590 \end{enumerate}
4591 \item
4592 Otherwise, if \bitvar{TOKEN} is 10:
4593 \begin{enumerate}
4594 \item
4595 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-1$.
4596 \item
4597 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4598 \item
4599 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4600 \end{enumerate}
4601 \item
4602 Otherwise, if \bitvar{TOKEN} is 11:
4603 \begin{enumerate}
4604 \item
4605 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $2$.
4606 \item
4607 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4608 \item
4609 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4610 \end{enumerate}
4611 \item
4612 Otherwise, if \bitvar{TOKEN} is 12:
4613 \begin{enumerate}
4614 \item
4615 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-2$.
4616 \item
4617 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4618 \item
4619 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4620 \end{enumerate}
4621 \item
4622 Otherwise, if \bitvar{TOKEN} is 13:
4623 \begin{enumerate}
4624 \item
4625 Read a 1-bit unsigned integer as \locvar{SIGN}.
4626 \item
4627 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4628 the value $3$.
4629 \item
4630 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-3$.
4631 \item
4632 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4633 \item
4634 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4635 \end{enumerate}
4636 \item
4637 Otherwise, if \bitvar{TOKEN} is 14:
4638 \begin{enumerate}
4639 \item
4640 Read a 1-bit unsigned integer as \locvar{SIGN}.
4641 \item
4642 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4643 the value $4$.
4644 \item
4645 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-4$.
4646 \item
4647 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4648 \item
4649 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4650 \end{enumerate}
4651 \item
4652 Otherwise, if \bitvar{TOKEN} is 15:
4653 \begin{enumerate}
4654 \item
4655 Read a 1-bit unsigned integer as \locvar{SIGN}.
4656 \item
4657 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4658 the value $5$.
4659 \item
4660 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-5$.
4661 \item
4662 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4663 \item
4664 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4665 \end{enumerate}
4666 \item
4667 Otherwise, if \bitvar{TOKEN} is 16:
4668 \begin{enumerate}
4669 \item
4670 Read a 1-bit unsigned integer as \locvar{SIGN}.
4671 \item
4672 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4673 the value $6$.
4674 \item
4675 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-6$.
4676 \item
4677 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4678 \item
4679 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4680 \end{enumerate}
4681 \item
4682 Otherwise, if \bitvar{TOKEN} is 17:
4683 \begin{enumerate}
4684 \item
4685 Read a 1-bit unsigned integer as \locvar{SIGN}.
4686 \item
4687 Read a 1-bit unsigned integer as \locvar{MAG}.
4688 \item
4689 Assign \locvar{MAG} the value $(\locvar{MAG}+7)$.
4690 \item
4691 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4692 the value $\locvar{MAG}$.
4693 \item
4694 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4695 $-\locvar{MAG}$.
4696 \item
4697 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4698 \item
4699 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4700 \end{enumerate}
4701 \item
4702 Otherwise, if \bitvar{TOKEN} is 18:
4703 \begin{enumerate}
4704 \item
4705 Read a 1-bit unsigned integer as \locvar{SIGN}.
4706 \item
4707 Read a 2-bit unsigned integer as \locvar{MAG}.
4708 \item
4709 Assign \locvar{MAG} the value $(\locvar{MAG}+9)$.
4710 \item
4711 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4712 the value $\locvar{MAG}$.
4713 \item
4714 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4715 $-\locvar{MAG}$.
4716 \item
4717 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4718 \item
4719 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4720 \end{enumerate}
4721 \item
4722 Otherwise, if \bitvar{TOKEN} is 19:
4723 \begin{enumerate}
4724 \item
4725 Read a 1-bit unsigned integer as \locvar{SIGN}.
4726 \item
4727 Read a 3-bit unsigned integer as \locvar{MAG}.
4728 \item
4729 Assign \locvar{MAG} the value $(\locvar{MAG}+13)$.
4730 \item
4731 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4732 the value $\locvar{MAG}$.
4733 \item
4734 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4735 $-\locvar{MAG}$.
4736 \item
4737 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4738 \item
4739 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4740 \end{enumerate}
4741 \item
4742 Otherwise, if \bitvar{TOKEN} is 20:
4743 \begin{enumerate}
4744 \item
4745 Read a 1-bit unsigned integer as \locvar{SIGN}.
4746 \item
4747 Read a 4-bit unsigned integer as \locvar{MAG}.
4748 \item
4749 Assign \locvar{MAG} the value $(\locvar{MAG}+21)$.
4750 \item
4751 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4752 the value $\locvar{MAG}$.
4753 \item
4754 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4755 $-\locvar{MAG}$.
4756 \item
4757 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4758 \item
4759 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4760 \end{enumerate}
4761 \item
4762 Otherwise, if \bitvar{TOKEN} is 21:
4763 \begin{enumerate}
4764 \item
4765 Read a 1-bit unsigned integer as \locvar{SIGN}.
4766 \item
4767 Read a 5-bit unsigned integer as \locvar{MAG}.
4768 \item
4769 Assign \locvar{MAG} the value $(\locvar{MAG}+37)$.
4770 \item
4771 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4772 the value $\locvar{MAG}$.
4773 \item
4774 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4775 $-\locvar{MAG}$.
4776 \item
4777 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4778 \item
4779 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4780 \end{enumerate}
4781 \item
4782 Otherwise, if \bitvar{TOKEN} is 22:
4783 \begin{enumerate}
4784 \item
4785 Read a 1-bit unsigned integer as \locvar{SIGN}.
4786 \item
4787 Read a 9-bit unsigned integer as \locvar{MAG}.
4788 \item
4789 Assign \locvar{MAG} the value $(\locvar{MAG}+69)$.
4790 \item
4791 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4792 the value $\locvar{MAG}$.
4793 \item
4794 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4795 $-\locvar{MAG}$.
4796 \item
4797 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4798 \item
4799 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4800 \end{enumerate}
4801 \item
4802 Otherwise, if \bitvar{TOKEN} is 23:
4803 \begin{enumerate}
4804 \item
4805 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value zero.
4806 \item
4807 Read a 1-bit unsigned integer as SIGN.
4808 \item
4809 If \locvar{SIGN} is zero, assign
4810 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $1$.
4811 \item
4812 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value
4813 $-1$.
4814 \item
4815 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$.
4816 \item
4817 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4818 \end{enumerate}
4819 \item
4820 Otherwise, if \bitvar{TOKEN} is 24:
4821 \begin{enumerate}
4822 \item
4823 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+1)$, assign
4824 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4825 \item
4826 Read a 1-bit unsigned integer as SIGN.
4827 \item
4828 If \locvar{SIGN} is zero, assign
4829 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value $1$.
4830 \item
4831 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value
4832 $-1$.
4833 \item
4834 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+3$.
4835 \item
4836 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4837 \end{enumerate}
4838 \item
4839 Otherwise, if \bitvar{TOKEN} is 25:
4840 \begin{enumerate}
4841 \item
4842 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+2)$, assign
4843 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4844 \item
4845 Read a 1-bit unsigned integer as SIGN.
4846 \item
4847 If \locvar{SIGN} is zero, assign
4848 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value $1$.
4849 \item
4850 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value
4851 $-1$.
4852 \item
4853 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+4$.
4854 \item
4855 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4856 \end{enumerate}
4857 \item
4858 Otherwise, if \bitvar{TOKEN} is 26:
4859 \begin{enumerate}
4860 \item
4861 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+3)$, assign
4862 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4863 \item
4864 Read a 1-bit unsigned integer as SIGN.
4865 \item
4866 If \locvar{SIGN} is zero, assign
4867 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value $1$.
4868 \item
4869 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value
4870 $-1$.
4871 \item
4872 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+5$.
4873 \item
4874 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4875 \end{enumerate}
4876 \item
4877 Otherwise, if \bitvar{TOKEN} is 27:
4878 \begin{enumerate}
4879 \item
4880 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+4)$, assign
4881 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4882 \item
4883 Read a 1-bit unsigned integer as SIGN.
4884 \item
4885 If \locvar{SIGN} is zero, assign
4886 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value $1$.
4887 \item
4888 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value
4889 $-1$.
4890 \item
4891 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+6$.
4892 \item
4893 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4894 \end{enumerate}
4895 \item
4896 Otherwise, if \bitvar{TOKEN} is 28:
4897 \begin{enumerate}
4898 \item
4899 Read a 1-bit unsigned integer as \locvar{SIGN}.
4900 \item
4901 Read a 2-bit unsigned integer as \locvar{RLEN}.
4902 \item
4903 Assign \locvar{RLEN} the value $(\locvar{RLEN}+6)$.
4904 \item
4905 For each value of \locvar{\tj} from \bitvar{\ti} to
4906 $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4907 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4908 \item
4909 If \locvar{SIGN} is zero, assign
4910 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$.
4911 \item
4912 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4913 the value $-1$.
4914 \item
4915 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4916 $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4917 \item
4918 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4919 \end{enumerate}
4920 \item
4921 Otherwise, if \bitvar{TOKEN} is 29:
4922 \begin{enumerate}
4923 \item
4924 Read a 1-bit unsigned integer as \locvar{SIGN}.
4925 \item
4926 Read a 3-bit unsigned integer as \locvar{RLEN}.
4927 \item
4928 Assign \locvar{RLEN} the value $(\locvar{RLEN}+10)$.
4929 \item
4930 For each value of \locvar{\tj} from \bitvar{\ti} to
4931 $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4932 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4933 \item
4934 If \locvar{SIGN} is zero, assign
4935 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$.
4936 \item
4937 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4938 the value $-1$.
4939 \item
4940 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4941 $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4942 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4943 \end{enumerate}
4944 \item
4945 Otherwise, if \bitvar{TOKEN} is 30:
4946 \begin{enumerate}
4947 \item
4948 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\ti}]$ the value zero.
4949 \item
4950 Read a 1-bit unsigned integer as \locvar{SIGN}.
4951 \item
4952 Read a 1-bit unsigned integer as \locvar{MAG}.
4953 \item
4954 Assign \locvar{MAG} the value $(\locvar{MAG}+2)$.
4955 \item
4956 If \locvar{SIGN} is zero, assign
4957 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $\locvar{MAG}$.
4958 \item
4959 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value
4960 $-\locvar{MAG}$.
4961 \item
4962 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$.
4963 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4964 \end{enumerate}
4965 \item
4966 Otherwise, if \bitvar{TOKEN} is 31:
4967 \begin{enumerate}
4968 \item
4969 Read a 1-bit unsigned integer as \locvar{SIGN}.
4970 \item
4971 Read a 1-bit unsigned integer as \locvar{MAG}.
4972 \item
4973 Assign \locvar{MAG} the value $(\locvar{MAG}+2)$.
4974 \item
4975 Read a 1-bit unsigned integer as \locvar{RLEN}.
4976 \item
4977 Assign \locvar{RLEN} the value $(\locvar{RLEN}+2)$.
4978 \item
4979 For each value of \locvar{\tj} from \bitvar{\ti} to
4980 $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4981 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4982 \item
4983 If \locvar{SIGN} is zero, assign
4984 $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value
4985 $\locvar{MAG}$.
4986 \item
4987 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4988 the value $-\locvar{MAG}$.
4989 \item
4990 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4991 $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4992 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4993 \end{enumerate}
4994 \end{enumerate}
4996 \subsection{DCT Coefficient Decode}
4997 \label{sub:dct-coeffs}
4999 \paragraph{Input parameters:}\hfill\\*
5000 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5001 \multicolumn{1}{c}{Name} &
5002 \multicolumn{1}{c}{Type} &
5003 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5004 \multicolumn{1}{c}{Signed?} &
5005 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5006 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
5007 frame. \\
5008 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
5009 1 & No & An \bitvar{NBS}-element array of flags
5010 indicating which blocks are coded. \\
5011 \bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a
5012 frame. \\
5013 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
5014 & An 80-element array of Huffman tables
5015 with up to 32 entries each. \\
5016 \bottomrule\end{tabularx}
5018 \paragraph{Output parameters:}\hfill\\*
5019 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5020 \multicolumn{1}{c}{Name} &
5021 \multicolumn{1}{c}{Type} &
5022 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5023 \multicolumn{1}{c}{Signed?} &
5024 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5025 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5026 16 & Yes & An $\bitvar{NBS}\times 64$ array of
5027 quantized DCT coefficient values for each block in zig-zag order. \\
5028 \bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
5029 7 & No & An \bitvar{NBS}-element array of the
5030 coefficient count for each block. \\
5031 \bottomrule\end{tabularx}
5033 \paragraph{Variables used:}\hfill\\*
5034 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5035 \multicolumn{1}{c}{Name} &
5036 \multicolumn{1}{c}{Type} &
5037 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5038 \multicolumn{1}{c}{Signed?} &
5039 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5040 \locvar{NLBS} & Integer & 34 & No & The number of blocks in the luma
5041 plane. \\
5042 \locvar{TIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
5043 7 & No & An \bitvar{NBS}-element array of the
5044 current token index for each block. \\
5045 \locvar{EOBS} & Integer & 36 & No & The remaining length of the current
5046 EOB run. \\
5047 \locvar{TOKEN} & Integer & 5 & No & The current token being decoded. \\
5048 \locvar{HG} & Integer & 3 & No & The current Huffman table group. \\
5049 \locvar{\cbi} & Integer & 36 & No & The index of the current block in the
5050 coded block list. \\
5051 \locvar{\bi} & Integer & 36 & No & The index of the current block in
5052 coded order. \\
5053 \locvar{\bj} & Integer & 36 & No & Another index of a block in coded
5054 order. \\
5055 \locvar{\ti} & Integer & 6 & No & The current token index. \\
5056 \locvar{\tj} & Integer & 6 & No & Another token index. \\
5057 \locvar{\hti_L} & Integer & 4 & No & The index of the current Huffman table
5058 to use for the luma plane within a group. \\
5059 \locvar{\hti_C} & Integer & 4 & No & The index of the current Huffman table
5060 to use for the chroma planes within a group. \\
5061 \locvar{\hti} & Integer & 7 & No & The index of the current Huffman table
5062 to use. \\
5063 \bottomrule\end{tabularx}
5064 \medskip
5066 This procedure puts the above two procedures to work to decode the entire set
5067 of DCT coefficients for the frame.
5068 At the end of this procedure, \locvar{EOBS} MUST be zero, and
5069 $\locvar{TIS}[\locvar{\bi}]$ MUST be 64 for every coded \locvar{\bi}.
5071 Note that we update the coefficient count of every block before continuing an
5072 EOB run or decoding a token, despite the fact that it is already up to date
5073 unless the previous token was a pure zero run.
5074 This is done intentionally to mimic the VP3 accounting rules.
5075 Thus the only time the coefficient count does not include the coefficients in a
5076 pure zero run is when when that run reaches all the way to coefficient 63.
5077 Note, however, that regardless of the coefficient count, any additional
5078 coefficients are still set to zero.
5079 The only use of the count is in determining if a special case of the inverse
5080 DCT can be used in Section~\ref{sub:2d-idct}.
5082 \begin{enumerate}
5083 \item
5084 Assign \locvar{NLBS} the value $(\bitvar{NMBS}*4)$.
5085 \item
5086 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$,
5087 assign $\locvar{TIS}[\locvar{\bi}]$ the value zero.
5088 \item
5089 Assign \locvar{EOBS} the value 0.
5090 \item
5091 For each consecutive value of \locvar{\ti} from 0 to 63:
5092 \begin{enumerate}
5093 \item
5094 If \locvar{\ti} is $0$ or $1$:
5095 \begin{enumerate}
5096 \item
5097 Read a 4-bit unsigned integer as \locvar{\hti_L}.
5098 \item
5099 Read a 4-bit unsigned integer as \locvar{\hti_C}.
5100 \end{enumerate}
5101 \item
5102 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ for
5103 which $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and
5104 $\locvar{TIS}[\locvar{\bi}]$ equals \locvar{\ti}:
5105 \begin{enumerate}
5106 \item
5107 Assign $\bitvar{NCOEFFS}[\locvar{\bi}]$ the value \locvar{\ti}.
5108 \item
5109 If \locvar{EOBS} is greater than zero:
5110 \begin{enumerate}
5111 \item
5112 For each value of \locvar{\tj} from $\locvar{\ti}$ to 63, assign
5113 $\bitvar{COEFFS}[\locvar{\bi}][\locvar{\tj}]$ the value zero.
5114 \item
5115 Assign $\locvar{TIS}[\locvar{\bi}]$ the value 64.
5116 \item
5117 Assign \locvar{EOBS} the value $(\locvar{EOBS}-1)$.
5118 \end{enumerate}
5119 \item
5120 Otherwise:
5121 \begin{enumerate}
5122 \item
5123 Assign \locvar{HG} a value based on \locvar{\ti} from
5124 Table~\ref{tab:huff-groups}.
5126 \begin{table}[htbp]
5127 \begin{center}
5128 \begin{tabular}{lc}\toprule
5129 \locvar{\ti} & \locvar{HG} \\\midrule
5130 $0$ & $0$ \\
5131 $1\ldots 4$ & $1$ \\
5132 $5\ldots 13$ & $2$ \\
5133 $14\ldots 26$ & $3$ \\
5134 $27\ldots 63$ & $4$ \\
5135 \bottomrule\end{tabular}
5136 \end{center}
5137 \caption{Huffman Table Groups}
5138 \label{tab:huff-groups}
5139 \end{table}
5141 \item
5142 If \locvar{\bi} is less than \locvar{NLBS}, assign \locvar{\hti} the value
5143 $(16*\locvar{HG}+\locvar{\hti_L})$.
5144 \item
5145 Otherwise, assign \locvar{\hti} the value
5146 $(16*\locvar{HG}+\locvar{\hti_C})$.
5147 \item
5148 Read one bit at a time until one of the codes in $\bitvar{HTS}[\locvar{\hti}]$
5149 is recognized, and assign the value to \locvar{TOKEN}.
5150 \item
5151 If \locvar{TOKEN} is less than 7, expand an EOB token using the procedure given
5152 in Section~\ref{sub:eob-token} to update $\locvar{TIS}[\locvar{\bi}]$,
5153 $\bitvar{COEFFS}[\locvar{\bi}]$, and \locvar{EOBS}.
5154 \item
5155 Otherwise, expand a coefficient token using the procedure given in
5156 Section~\ref{sub:coeff-token} to update $\locvar{TIS}[\locvar{\bi}]$,
5157 $\bitvar{COEFFS}[\locvar{\bi}]$, and $\bitvar{NCOEFFS}[\locvar{\bi}]$.
5158 \end{enumerate}
5159 \end{enumerate}
5160 \end{enumerate}
5161 \end{enumerate}
5163 \section{Undoing DC Prediction}
5165 The actual value of a DC coefficient decoded by Section~\ref{sec:dct-decode} is
5166 the residual from a predicted value computed by the encoder.
5167 This prediction is only applied to DC coefficients.
5168 Quantized AC coefficients are encoded directly.
5170 This section describes how to undo this prediction to recover the original
5171 DC coefficients.
5172 The predicted DC value for a block is computed from the DC values of its
5173 immediate neighbors which precede the block in raster order.
5174 Thus, reversing this prediction must procede in raster order, instead of coded
5175 order.
5177 Note that this step comes before dequantizing the coefficients.
5178 For this reason, DC coefficients are all quantized with the same \qi\ value,
5179 regardless of the block-level \qi\ values decoded in
5180 Section~\ref{sub:block-qis}.
5181 Those \qi\ values are applied only to the AC coefficients.
5183 \subsection{Computing the DC Predictor}
5184 \label{sub:dc-pred}
5186 \paragraph{Input parameters:}\hfill\\*
5187 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5188 \multicolumn{1}{c}{Name} &
5189 \multicolumn{1}{c}{Type} &
5190 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5191 \multicolumn{1}{c}{Signed?} &
5192 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5193 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
5194 1 & No & An \bitvar{NBS}-element array of flags
5195 indicating which blocks are coded. \\
5196 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
5197 3 & No & An \bitvar{NMBS}-element array of
5198 coding modes for each macro block. \\
5199 \bitvar{LASTDC} & \multicolumn{1}{p{40pt}}{Integer Array} &
5200 16 & Yes & A 3-element array containing the
5201 most recently decoded DC value, one for inter mode and for each reference
5202 frame. \\
5203 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5204 16 & Yes & An $\bitvar{NBS}\times 64$ array of
5205 quantized DCT coefficient values for each block in zig-zag order. \\
5206 \bitvar{\bi} & Integer & 36 & No & The index of the current block in
5207 coded order. \\
5208 \bottomrule\end{tabularx}
5210 \paragraph{Output parameters:}\hfill\\*
5211 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5212 \multicolumn{1}{c}{Name} &
5213 \multicolumn{1}{c}{Type} &
5214 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5215 \multicolumn{1}{c}{Signed?} &
5216 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5217 \bitvar{DCPRED} & Integer & 16 & Yes & The predicted DC value for the current
5218 block. \\
5219 \bottomrule\end{tabularx}
5221 \paragraph{Variables used:}\hfill\\*
5222 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5223 \multicolumn{1}{c}{Name} &
5224 \multicolumn{1}{c}{Type} &
5225 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5226 \multicolumn{1}{c}{Signed?} &
5227 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5228 \locvar{P} & \multicolumn{1}{p{40pt}}{Integer Array} &
5229 1 & No & A 4-element array indicating which
5230 neighbors can be used for DC prediction. \\
5231 \locvar{PBI} & \multicolumn{1}{p{40pt}}{Integer Array} &
5232 36 & No & A 4-element array containing the
5233 coded-order block index of the current block's neighbors. \\
5234 \locvar{W} & \multicolumn{1}{p{40pt}}{Integer Array} &
5235 7 & Yes & A 4-element array of the weights to
5236 apply to each neighboring DC value. \\
5237 \locvar{PDIV} & Integer & 8 & No & The valud to divide the weighted sum
5238 by. \\
5239 \locvar{\bj} & Integer & 36 & No & The index of a neighboring block in
5240 coded order. \\
5241 \locvar{\mbi} & Integer & 32 & No & The index of the macro block
5242 containing block \locvar{\bi}. \\
5243 \locvar{\mbi} & Integer & 32 & No & The index of the macro block
5244 containing block \locvar{\bj}. \\
5245 \locvar{\rfi} & Integer & 2 & No & The index of the reference frame
5246 indicated by the coding mode for macro block \locvar{\mbi}. \\
5247 \bottomrule\end{tabularx}
5248 \medskip
5250 This procedure outlines how a predictor is formed for a single block.
5252 The predictor is computed as a weighted sum of the neighboring DC values from
5253 coded blocks which use the same reference frame.
5254 This latter condition is determined only by checking the coding mode for the
5255 block.
5256 Even if the golden frame and the previous frame are in fact the same, e.g. for
5257 the first inter frame after an intra frame, they are still treated as being
5258 different for the purposes of DC prediction.
5259 The weighted sum is divided by a power of two, with truncation towards zero,
5260 and the result is checked for outranging if necessary.
5262 If there are no neighboring coded blocks which use the same reference frame as
5263 the current block, then the most recent DC value of any block that used that
5264 reference frame is used instead.
5265 If no such block exists, then the predictor is set to zero.
5267 \begin{enumerate}
5268 \item
5269 Assign \locvar{\mbi} the index of the macro block containing block
5270 \bitvar{\bi}.
5271 \item
5272 Assign \locvar{\rfi} the value of the Reference Frame Index column of
5273 Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
5275 \begin{table}[htpb]
5276 \begin{center}
5277 \begin{tabular}{ll}\toprule
5278 Coding Mode & Reference Frame Index \\\midrule
5279 $0$ (INTER\_NOMV) & $1$ (Previous) \\
5280 $1$ (INTRA) & $0$ (None) \\
5281 $2$ (INTER\_MV) & $1$ (Previous) \\
5282 $3$ (INTER\_MV\_LAST) & $1$ (Previous) \\
5283 $4$ (INTER\_MV\_LAST2) & $1$ (Previous) \\
5284 $5$ (INTER\_GOLDEN\_NOMV) & $2$ (Golden) \\
5285 $6$ (INTER\_GOLDEN\_MV) & $2$ (Golden) \\
5286 $7$ (INTER\_MV\_FOUR) & $1$ (Previous) \\
5287 \bottomrule\end{tabular}
5288 \end{center}
5289 \caption{Reference Frames for Each Coding Mode}
5290 \label{tab:cm-refs}
5291 \end{table}
5293 \item
5294 If block \locvar{\bi} is not along the left edge of the coded frame:
5295 \begin{enumerate}
5296 \item
5297 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s left
5298 neighbor, i.e., in the same row but one column to the left.
5299 \item
5300 If $\bitvar{BCODED}[\bj]$ is not zero:
5301 \begin{enumerate}
5302 \item
5303 Assign \locvar{\mbj} the index of the macro block containing block
5304 \locvar{\bj}.
5305 \item
5306 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5307 corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5308 \begin{enumerate}
5309 \item
5310 Assign $\locvar{P}[0]$ the value $1$.
5311 \item
5312 Assign $\locvar{PBI}[0]$ the value \locvar{\bj}.
5313 \end{enumerate}
5314 \item
5315 Otherwise, assign $\locvar{P}[0]$ the value zero.
5316 \end{enumerate}
5317 \item
5318 Otherwise, assign $\locvar{P}[0]$ the value zero.
5319 \end{enumerate}
5320 \item
5321 Otherwise, assign $\locvar{P}[0]$ the value zero.
5323 \item
5324 If block \locvar{\bi} is not along the left edge nor the bottom edge of the
5325 coded frame:
5326 \begin{enumerate}
5327 \item
5328 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-left
5329 neighbor, i.e., one row down and one column to the left.
5330 \item
5331 If $\bitvar{BCODED}[\bj]$ is not zero:
5332 \begin{enumerate}
5333 \item
5334 Assign \locvar{\mbj} the index of the macro block containing block
5335 \locvar{\bj}.
5336 \item
5337 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5338 corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5339 \begin{enumerate}
5340 \item
5341 Assign $\locvar{P}[1]$ the value $1$.
5342 \item
5343 Assign $\locvar{PBI}[1]$ the value \locvar{\bj}.
5344 \end{enumerate}
5345 \item
5346 Otherwise, assign $\locvar{P}[1]$ the value zero.
5347 \end{enumerate}
5348 \item
5349 Otherwise, assign $\locvar{P}[1]$ the value zero.
5350 \end{enumerate}
5351 \item
5352 Otherwise, assign $\locvar{P}[1]$ the value zero.
5354 \item
5355 If block \locvar{\bi} is not along the the bottom edge of the coded frame:
5356 \begin{enumerate}
5357 \item
5358 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower
5359 neighbor, i.e., in the same column but one row down.
5360 \item
5361 If $\bitvar{BCODED}[\bj]$ is not zero:
5362 \begin{enumerate}
5363 \item
5364 Assign \locvar{\mbj} the index of the macro block containing block
5365 \locvar{\bj}.
5366 \item
5367 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5368 corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5369 \begin{enumerate}
5370 \item
5371 Assign $\locvar{P}[2]$ the value $1$.
5372 \item
5373 Assign $\locvar{PBI}[2]$ the value \locvar{\bj}.
5374 \end{enumerate}
5375 \item
5376 Otherwise, assign $\locvar{P}[2]$ the value zero.
5377 \end{enumerate}
5378 \item
5379 Otherwise, assign $\locvar{P}[2]$ the value zero.
5380 \end{enumerate}
5381 \item
5382 Otherwise, assign $\locvar{P}[2]$ the value zero.
5384 \item
5385 If block \locvar{\bi} is not along the right edge nor the bottom edge of the
5386 coded frame:
5387 \begin{enumerate}
5388 \item
5389 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-right
5390 neighbor, i.e., one row down and one column to the right.
5391 \item
5392 If $\bitvar{BCODED}[\bj]$ is not zero:
5393 \begin{enumerate}
5394 \item
5395 Assign \locvar{\mbj} the index of the macro block containing block
5396 \locvar{\bj}.
5397 \item
5398 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5399 corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5400 \begin{enumerate}
5401 \item
5402 Assign $\locvar{P}[3]$ the value $1$.
5403 \item
5404 Assign $\locvar{PBI}[3]$ the value \locvar{\bj}.
5405 \end{enumerate}
5406 \item
5407 Otherwise, assign $\locvar{P}[3]$ the value zero.
5408 \end{enumerate}
5409 \item
5410 Otherwise, assign $\locvar{P}[3]$ the value zero.
5411 \end{enumerate}
5412 \item
5413 Otherwise, assign $\locvar{P}[3]$ the value zero.
5415 \item
5416 If none of the values $\locvar{P}[0]$, $\locvar{P}[1]$, $\locvar{P}[2]$, nor
5417 $\locvar{P}[3]$ are non-zero, then assign \bitvar{DCPRED} the value
5418 $\bitvar{LASTDC}[\locvar{\rfi}]$.
5419 \item
5420 Otherwise:
5421 \begin{enumerate}
5422 \item
5423 Assign the array \locvar{W} and the variable \locvar{PDIV} the values from the
5424 row of Table~\ref{tab:dc-weights} corresonding to the values of each
5425 $\locvar{P}[\idx{i}]$.
5427 \begin{table}[htb]
5428 \begin{center}
5429 \begin{tabular}{ccccrrrrr}\toprule
5430 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[0]$ (L)} &
5431 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[1]$ (DL)} &
5432 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[2]$ (D)} &
5433 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[3]$ (DR)} &
5434 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (L)} &
5435 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[1]$ (DL)} &
5436 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[2]$ (D)} &
5437 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (DR)} &
5438 \locvar{PDIV} \\\midrule
5439 $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ \\
5440 $0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $1$ \\
5441 $1$ & $1$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ \\
5442 $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $1$ \\
5443 $1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $2$ \\
5444 $0$ & $1$ & $1$ & $0$ & $0$ & $0$ & $1$ & $0$ & $1$ \\
5445 $1$ & $1$ & $1$ & $0$ & $29$ & $-26$ & $29$ & $0$ & $32$ \\
5446 $0$ & $0$ & $0$ & $1$ & $0$ & $0$ & $0$ & $1$ & $1$ \\
5447 $1$ & $0$ & $0$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\
5448 $0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $0$ & $1$ & $2$ \\
5449 $1$ & $1$ & $0$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\
5450 $0$ & $0$ & $1$ & $1$ & $0$ & $0$ & $1$ & $0$ & $1$ \\
5451 $1$ & $0$ & $1$ & $1$ & $75$ & $0$ & $0$ & $53$ & $128$ \\
5452 $0$ & $1$ & $1$ & $1$ & $0$ & $3$ & $10$ & $3$ & $16$ \\
5453 $1$ & $1$ & $1$ & $1$ & $29$ & $-26$ & $29$ & $0$ & $32$ \\
5454 \bottomrule\end{tabular}
5455 \end{center}
5456 \caption{Weights and Divisors for Each Set of Available DC Predictors}
5457 \label{tab:dc-weights}
5458 \end{table}
5460 \item
5461 Assign \bitvar{DCPRED} the value zero.
5462 \item
5463 If $\locvar{P}[0]$ is non-zero, assign \bitvar{DCPRED} the value
5464 $(\bitvar{DCPRED}+\locvar{W}[0]*\bitvar{COEFFS}[\locvar{PBI}[0]][0])$.
5465 \item
5466 If $\locvar{P}[1]$ is non-zero, assign \bitvar{DCPRED} the value
5467 $(\bitvar{DCPRED}+\locvar{W}[1]*\bitvar{COEFFS}[\locvar{PBI}[1]][0])$.
5468 \item
5469 If $\locvar{P}[2]$ is non-zero, assign \bitvar{DCPRED} the value
5470 $(\bitvar{DCPRED}+\locvar{W}[2]*\bitvar{COEFFS}[\locvar{PBI}[2]][0])$.
5471 \item
5472 If $\locvar{P}[3]$ is non-zero, assign \bitvar{DCPRED} the value
5473 $(\bitvar{DCPRED}+\locvar{W}[3]*\bitvar{COEFFS}[\locvar{PBI}[3]][0])$.
5474 \item
5475 Assign \bitvar{DCPRED} the value $(\bitvar{DCPRED}//\locvar{PDIV})$.
5476 \item
5477 If $\locvar{P}[0]$, $\locvar{P}[1]$, and $\locvar{P}[2]$ are all non-zero:
5478 \begin{enumerate}
5479 \item
5480 If $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[2]][0]|$ is greater than
5481 $128$, assign \bitvar{DCPRED} the value $\bitvar{COEFFS}[\locvar{PBI}[2]][0]$.
5482 \item
5483 Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[0]][0]|$ is
5484 greater than $128$, assign \bitvar{DCPRED} the value
5485 $\bitvar{COEFFS}[\locvar{PBI}[0]][0]$.
5486 \item
5487 Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[1]][0]|$ is
5488 greater than $128$, assign \bitvar{DCPRED} the value
5489 $\bitvar{COEFFS}[\locvar{PBI}[1]][0]$.
5490 \end{enumerate}
5491 \end{enumerate}
5492 \end{enumerate}
5494 \subsection{Inverting the DC Prediction Process}
5495 \label{sub:dc-pred-undo}
5497 \paragraph{Input parameters:}\hfill\\*
5498 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5499 \multicolumn{1}{c}{Name} &
5500 \multicolumn{1}{c}{Type} &
5501 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5502 \multicolumn{1}{c}{Signed?} &
5503 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5504 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
5505 1 & No & An \bitvar{NBS}-element array of flags
5506 indicating which blocks are coded. \\
5507 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
5508 3 & No & An \bitvar{NMBS}-element array of
5509 coding modes for each macro block. \\
5510 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5511 16 & Yes & An $\bitvar{NBS}\times 64$ array of
5512 quantized DCT coefficient values for each block in zig-zag order. \\
5513 \bottomrule\end{tabularx}
5515 \paragraph{Output parameters:}\hfill\\*
5516 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5517 \multicolumn{1}{c}{Name} &
5518 \multicolumn{1}{c}{Type} &
5519 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5520 \multicolumn{1}{c}{Signed?} &
5521 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5522 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5523 16 & Yes & An $\bitvar{NBS}\times 64$ array of
5524 quantized DCT coefficient values for each block in zig-zag order. The DC
5525 value of each block will be updated. \\
5526 \bottomrule\end{tabularx}
5528 \paragraph{Variables used:}\hfill\\*
5529 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5530 \multicolumn{1}{c}{Name} &
5531 \multicolumn{1}{c}{Type} &
5532 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5533 \multicolumn{1}{c}{Signed?} &
5534 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5535 \locvar{LASTDC} & \multicolumn{1}{p{40pt}}{Integer Array} &
5536 16 & Yes & A 3-element array containing the
5537 most recently decoded DC value, one for inter mode and for each reference
5538 frame. \\
5539 \locvar{DCPRED} & Integer & 11 & Yes & The predicted DC value for the current
5540 block. \\
5541 \locvar{DC} & Integer & 17 & Yes & The actual DC value for the current
5542 block. \\
5543 \locvar{\bi} & Integer & 36 & No & The index of the current block in
5544 coded order. \\
5545 \locvar{\mbi} & Integer & 32 & No & The index of the macro block
5546 containing block \locvar{\bi}. \\
5547 \locvar{\rfi} & Integer & 2 & No & The index of the reference frame
5548 indicated by the coding mode for macro block \locvar{\mbi}. \\
5549 \bottomrule\end{tabularx}
5550 \medskip
5552 This procedure describes the complete process of undoing the DC prediction to
5553 recover the original DC values.
5554 Because it is possible to add a value as large as $580$ to the predicted DC
5555 coefficient value at every block, which will then be used to increase the
5556 predictor for the next block, the reconstructed DC value could overflow a
5557 16-bit integer.
5558 This is handled by truncating the result to a 16-bit signed representation,
5559 simply throwing away any higher bits in the two's complement representation of
5560 the number.
5562 \begin{enumerate}
5563 \item
5564 Assign $\locvar{LASTDC}[0]$ the value zero.
5565 \item
5566 Assign $\locvar{LASTDC}[1]$ the value zero.
5567 \item
5568 Assign $\locvar{LASTDC}[2]$ the value zero.
5569 \item
5570 For each block in {\em raster} order, with coded-order index \locvar{\bi}:
5571 \begin{enumerate}
5572 \item
5573 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
5574 \begin{enumerate}
5575 \item
5576 Compute the value \locvar{DCPRED} using the procedure outlined in
5577 Section~\ref{sub:dc-pred}.
5578 \item
5579 Assign \locvar{DC} the value
5580 $(\bitvar{COEFFS}[\locvar{\bi}][0]+\locvar{DCPRED})$.
5581 \item
5582 Truncate \locvar{DC} to a 16-bit representation by dropping any higher-order
5583 bits.
5584 \item
5585 Assign $\bitvar{COEFFS}[\locvar{\bi}][0]$ the value \locvar{DC}.
5586 \item
5587 Assign \locvar{\mbi} the index of the macro block containing block
5588 \locvar{\bi}.
5589 \item
5590 Assign \locvar{\rfi} the value of the Reference Frame Index column of
5591 Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
5592 \item
5593 Assign $\locvar{LASTDC}[\rfi]$ the value $\locvar{DC}$.
5594 \end{enumerate}
5595 \end{enumerate}
5596 \end{enumerate}
5598 \section{Reconstruction}
5600 At this stage, the complete contents of the data packet have been decoded.
5601 All that remains is to reconstruct the contents of the new frame.
5602 This is applied on a block by block basis, and as each block is independent,
5603 the order they are processed in does not matter.
5605 \subsection{Predictors}
5606 \label{sec:predictors}
5608 For each block, a predictor is formed based on its coding mode and motion
5609 vector.
5610 There are three basic types of predictors: the intra predictor, the whole-pixel
5611 predictor, and the half-pixel predictor.
5612 The former is used for all blocks coded in INTRA mode, while all other blocks
5613 use one of the latter two.
5614 The whole-pixel predictor is used if the fractional part of both motion vector
5615 components is zero, otherwise the half-pixel predictor is used.
5617 \subsubsection{The Intra Predictor}
5618 \label{sub:predintra}
5620 \paragraph{Input parameters:} None.
5622 \paragraph{Output parameters:}\hfill\\*
5623 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5624 \multicolumn{1}{c}{Name} &
5625 \multicolumn{1}{c}{Type} &
5626 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5627 \multicolumn{1}{c}{Signed?} &
5628 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5629 \bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5630 8 & No & An $8\times 8$ array of predictor
5631 values to use for INTRA coded blocks. \\
5632 \bottomrule\end{tabularx}
5634 \paragraph{Variables used:}\hfill\\*
5635 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5636 \multicolumn{1}{c}{Name} &
5637 \multicolumn{1}{c}{Type} &
5638 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5639 \multicolumn{1}{c}{Signed?} &
5640 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5641 \locvar{\idx{bx}} & Integer & 3 & No & The horizontal pixel index in the
5642 block. \\
5643 \locvar{\idx{by}} & Integer & 3 & No & The vertical pixel index in the
5644 block. \\
5645 \bottomrule\end{tabularx}
5646 \medskip
5648 The intra predictor is nothing more than the constant value $128$.
5649 This is applied for the sole purpose of centering the range of possible DC
5650 values for INTRA blocks around zero.
5652 \begin{enumerate}
5653 \item
5654 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5655 \begin{enumerate}
5656 \item
5657 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5658 \begin{enumerate}
5659 \item
5660 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value $128$.
5661 \end{enumerate}
5662 \end{enumerate}
5663 \end{enumerate}
5665 \subsubsection{The Whole-Pixel Predictor}
5666 \label{sub:predfullpel}
5668 \paragraph{Input parameters:}\hfill\\*
5669 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5670 \multicolumn{1}{c}{Name} &
5671 \multicolumn{1}{c}{Type} &
5672 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5673 \multicolumn{1}{c}{Signed?} &
5674 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5675 \bitvar{RPW} & Integer & 20 & No & The width of the current plane of the
5676 reference frame in pixels. \\
5677 \bitvar{RPH} & Integer & 20 & No & The height of the current plane of the
5678 reference frame in pixels. \\
5679 \bitvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5680 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
5681 array containing the contents of the current plane of the reference frame. \\
5682 \bitvar{BX} & Integer & 20 & No & The horizontal pixel index of the
5683 lower-left corner of the current block. \\
5684 \bitvar{BY} & Integer & 20 & No & The vertical pixel index of the
5685 lower-left corner of the current block. \\
5686 \bitvar{MVX} & Integer & 5 & No & The horizontal component of the block
5687 motion vector.
5688 This is always a whole-pixel value. \\
5689 \bitvar{MVY} & Integer & 5 & No & The vertical component of the block
5690 motion vector.
5691 This is always a whole-pixel value. \\
5692 \bottomrule\end{tabularx}
5694 \paragraph{Output parameters:}\hfill\\*
5695 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5696 \multicolumn{1}{c}{Name} &
5697 \multicolumn{1}{c}{Type} &
5698 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5699 \multicolumn{1}{c}{Signed?} &
5700 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5701 \bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5702 8 & No & An $8\times 8$ array of predictor
5703 values to use for INTER coded blocks. \\
5704 \bottomrule\end{tabularx}
5706 \paragraph{Variables used:}\hfill\\*
5707 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5708 \multicolumn{1}{c}{Name} &
5709 \multicolumn{1}{c}{Type} &
5710 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5711 \multicolumn{1}{c}{Signed?} &
5712 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5713 \locvar{\idx{bx}} & Integer & 3 & Yes & The horizontal pixel index in the
5714 block. \\
5715 \locvar{\idx{by}} & Integer & 3 & Yes & The vertical pixel index in the
5716 block. \\
5717 \locvar{\idx{rx}} & Integer & 20 & No & The horizontal pixel index in the
5718 reference frame. \\
5719 \locvar{\idx{ry}} & Integer & 20 & No & The vertical pixel index in the
5720 reference frame. \\
5721 \bottomrule\end{tabularx}
5722 \medskip
5724 The whole pixel predictor simply copies verbatim the contents of the reference
5725 frame pointed to by the block's motion vector.
5726 If the vector points outside the reference frame, then the closest value on the
5727 edge of the reference frame is used instead.
5728 In practice, this is usually implemented by expanding the size of the reference
5729 frame by $8$ or $16$ pixels on each side---depending on whether or not the
5730 corresponding axis is subsampled in the current plane---and copying the border
5731 pixels into this region.
5733 \begin{enumerate}
5734 \item
5735 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5736 \begin{enumerate}
5737 \item
5738 Assign \locvar{\idx{ry}} the value
5739 $(\bitvar{BY}+\bitvar{MVY}+\locvar{\idx{by}})$.
5740 \item
5741 If \locvar{\idx{ry}} is greater than $(\bitvar{RPH}-1)$, assign
5742 \locvar{\idx{ry}} the value $(\bitvar{RPH}-1)$.
5743 \item
5744 If \locvar{\idx{ry}} is less than zero, assign \locvar{\idx{ry}} the value
5745 zero.
5746 \item
5747 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5748 \begin{enumerate}
5749 \item
5750 Assign \locvar{\idx{rx}} the value
5751 $(\bitvar{BX}+\bitvar{MVX}+\locvar{\idx{bx}})$.
5752 \item
5753 If \locvar{\idx{rx}} is greater than $(\bitvar{RPW}-1)$, assign
5754 \locvar{\idx{rx}} the value $(\bitvar{RPW}-1)$.
5755 \item
5756 If \locvar{\idx{rx}} is less than zero, assign \locvar{\idx{rx}} the value
5757 zero.
5758 \item
5759 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value
5760 $\bitvar{REFP}[\locvar{\idx{ry}}][\locvar{\idx{rx}}]$.
5761 \end{enumerate}
5762 \end{enumerate}
5763 \end{enumerate}
5765 \subsubsection{The Half-Pixel Predictor}
5766 \label{sub:predhalfpel}
5768 \paragraph{Input parameters:}\hfill\\*
5769 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5770 \multicolumn{1}{c}{Name} &
5771 \multicolumn{1}{c}{Type} &
5772 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5773 \multicolumn{1}{c}{Signed?} &
5774 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5775 \bitvar{RPW} & Integer & 20 & No & The width of the current plane of the
5776 reference frame in pixels. \\
5777 \bitvar{RPH} & Integer & 20 & No & The height of the current plane of the
5778 reference frame in pixels. \\
5779 \bitvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5780 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
5781 array containing the contents of the current plane of the reference frame. \\
5782 \bitvar{BX} & Integer & 20 & No & The horizontal pixel index of the
5783 lower-left corner of the current block. \\
5784 \bitvar{BY} & Integer & 20 & No & The vertical pixel index of the
5785 lower-left corner of the current block. \\
5786 \bitvar{MVX} & Integer & 5 & No & The horizontal component of the first
5787 whole-pixel motion vector. \\
5788 \bitvar{MVY} & Integer & 5 & No & The vertical component of the first
5789 whole-pixel motion vector. \\
5790 \bitvar{MVX2} & Integer & 5 & No & The horizontal component of the second
5791 whole-pixel motion vector. \\
5792 \bitvar{MVY2} & Integer & 5 & No & The vertical component of the second
5793 whole-pixel motion vector. \\
5794 \bottomrule\end{tabularx}
5796 \paragraph{Output parameters:}\hfill\\*
5797 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5798 \multicolumn{1}{c}{Name} &
5799 \multicolumn{1}{c}{Type} &
5800 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5801 \multicolumn{1}{c}{Signed?} &
5802 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5803 \bitvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5804 8 & No & An $8\times 8$ array of predictor
5805 values to use for INTER coded blocks. \\
5806 \bottomrule\end{tabularx}
5808 \paragraph{Variables used:}\hfill\\*
5809 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5810 \multicolumn{1}{c}{Name} &
5811 \multicolumn{1}{c}{Type} &
5812 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5813 \multicolumn{1}{c}{Signed?} &
5814 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5815 \locvar{\idx{bx}} & Integer & 3 & Yes & The horizontal pixel index in the
5816 block. \\
5817 \locvar{\idx{by}} & Integer & 3 & Yes & The vertical pixel index in the
5818 block. \\
5819 \locvar{\idx{rx1}} & Integer & 20 & No & The first horizontal pixel index in
5820 the reference frame. \\
5821 \locvar{\idx{ry1}} & Integer & 20 & No & The first vertical pixel index in the
5822 reference frame. \\
5823 \locvar{\idx{rx2}} & Integer & 20 & No & The second horizontal pixel index in
5824 the reference frame. \\
5825 \locvar{\idx{ry2}} & Integer & 20 & No & The second vertical pixel index in
5826 the reference frame. \\
5827 \bottomrule\end{tabularx}
5828 \medskip
5830 If one or both of the components of the block motion vector is not a
5831 whole-pixel value, then the half-pixel predictor is used.
5832 The half-pixel predictor converts the fractional motion vector into two
5833 whole-pixel motion vectors.
5834 The first is formed by truncating the values of each component towards zero,
5835 and the second is formed by truncating them away from zero.
5836 The contributions from the reference frame at the locations pointed to by each
5837 vector are averaged, truncating towards negative infinity.
5839 Only two samples from the reference frame contribute to each predictor value,
5840 even if both components of the motion vector have non-zero fractional
5841 components.
5842 Motion vector components with quarter-pixel accuracy in the chroma planes are
5843 treated exactly the same as those with half-pixel accuracy.
5844 Any non-zero fractional part gets rounded one way in the first vector, and the
5845 other way in the second.
5847 \begin{enumerate}
5848 \item
5849 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5850 \begin{enumerate}
5851 \item
5852 Assign \locvar{\idx{ry1}} the value
5853 $(\bitvar{BY}+\bitvar{MVY1}+\locvar{\idx{by}})$.
5854 \item
5855 If \locvar{\idx{ry1}} is greater than $(\bitvar{RPH}-1)$, assign
5856 \locvar{\idx{ry1}} the value $(\bitvar{RPH}-1)$.
5857 \item
5858 If \locvar{\idx{ry1}} is less than zero, assign \locvar{\idx{ry1}} the value
5859 zero.
5860 \item
5861 Assign \locvar{\idx{ry2}} the value
5862 $(\bitvar{BY}+\bitvar{MVY2}+\locvar{\idx{by}})$.
5863 \item
5864 If \locvar{\idx{ry2}} is greater than $(\bitvar{RPH}-1)$, assign
5865 \locvar{\idx{ry2}} the value $(\bitvar{RPH}-1)$.
5866 \item
5867 If \locvar{\idx{ry2}} is less than zero, assign \locvar{\idx{ry2}} the value
5868 zero.
5869 \item
5870 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5871 \begin{enumerate}
5872 \item
5873 Assign \locvar{\idx{rx1}} the value
5874 $(\bitvar{BX}+\bitvar{MVX1}+\locvar{\idx{bx}})$.
5875 \item
5876 If \locvar{\idx{rx1}} is greater than $(\bitvar{RPW}-1)$, assign
5877 \locvar{\idx{rx1}} the value $(\bitvar{RPW}-1)$.
5878 \item
5879 If \locvar{\idx{rx1}} is less than zero, assign \locvar{\idx{rx1}} the value
5880 zero.
5881 \item
5882 Assign \locvar{\idx{rx2}} the value
5883 $(\bitvar{BX}+\bitvar{MVX2}+\locvar{\idx{bx}})$.
5884 \item
5885 If \locvar{\idx{rx2}} is greater than $(\bitvar{RPW}-1)$, assign
5886 \locvar{\idx{rx2}} the value $(\bitvar{RPW}-1)$.
5887 \item
5888 If \locvar{\idx{rx2}} is less than zero, assign \locvar{\idx{rx2}} the value
5889 zero.
5890 \item
5891 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value
5892 \begin{equation*}
5893 (\bitvar{REFP}[\locvar{\idx{ry1}}][\locvar{\idx{rx1}}]+
5894 \bitvar{REFP}[\locvar{\idx{ry2}}][\locvar{\idx{rx2}}])>>1.
5895 \end{equation*}
5896 \end{enumerate}
5897 \end{enumerate}
5898 \end{enumerate}
5900 \subsection{Dequantization}
5901 \label{sub:dequant}
5903 \paragraph{Input parameters:}\hfill\\*
5904 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5905 \multicolumn{1}{c}{Name} &
5906 \multicolumn{1}{c}{Type} &
5907 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5908 \multicolumn{1}{c}{Signed?} &
5909 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5910 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5911 16 & Yes & An $\bitvar{NBS}\times 64$ array of
5912 quantized DCT coefficient values for each block in zig-zag order. \\
5913 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
5914 16 & No & A 64-element array of scale values for
5915 AC coefficients for each \qi\ value. \\
5916 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
5917 16 & No & A 64-element array of scale values for
5918 the DC coefficient for each \qi\ value. \\
5919 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
5920 8 & No & A $\bitvar{NBMS}\times 64$ array
5921 containing the base matrices. \\
5922 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
5923 6 & No & A $2\times 3$ array containing the
5924 number of quant ranges for a given \qti\ and \pli, respectively.
5925 This is at most $63$. \\
5926 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
5927 6 & No & A $2\times 3\times 63$ array of the
5928 sizes of each quant range for a given \qti\ and \pli, respectively.
5929 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
5930 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
5931 9 & No & A $2\times 3\times 64$ array of the
5932 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
5933 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
5934 \bitvar{\qti} & Integer & 1 & No & A quantization type index.
5935 See Table~\ref{tab:quant-types}.\\
5936 \bitvar{\pli} & Integer & 2 & No & A color plane index.
5937 See Table~\ref{tab:color-planes}.\\
5938 \bitvar{\idx{qi0}} & Integer & 6 & No & The quantization index of the DC
5939 coefficient. \\
5940 \bitvar{\qi} & Integer & 6 & No & The quantization index of the AC
5941 coefficients. \\
5942 \bitvar{\bi} & Integer & 36 & No & The index of the current block in
5943 coded order. \\
5944 \bottomrule\end{tabularx}
5946 \paragraph{Output parameters:}\hfill\\*
5947 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5948 \multicolumn{1}{c}{Name} &
5949 \multicolumn{1}{c}{Type} &
5950 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5951 \multicolumn{1}{c}{Signed?} &
5952 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5953 \bitvar{DQC} & \multicolumn{1}{p{40pt}}{Integer Array} &
5954 14 & Yes & A $64$-element array of dequantized
5955 DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\
5956 \bottomrule\end{tabularx}
5958 \paragraph{Variables used:}\hfill\\*
5959 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5960 \multicolumn{1}{c}{Name} &
5961 \multicolumn{1}{c}{Type} &
5962 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5963 \multicolumn{1}{c}{Signed?} &
5964 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5965 \locvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} &
5966 16 & No & A 64-element array of quantization
5967 values for each DCT coefficient in natural order. \\
5968 \locvar{\ci} & Integer & 6 & No & The DCT coefficient index in natural
5969 order. \\
5970 \locvar{\zzi} & Integer & 6 & No & The DCT coefficient index in zig-zag
5971 order. \\
5972 \locvar{C} & Integer & 29 & Yes & A single dequantized coefficient. \\
5973 \bottomrule\end{tabularx}
5974 \medskip
5976 This procedure takes the quantized DCT coefficient values in zig-zag order for
5977 a single block---after DC prediction has been undone---and returns the
5978 dequantized values in natural order.
5979 If large coefficient values are decoded for coarsely quantized coefficients,
5980 the resulting dequantized value can be significantly larger than 16 bits.
5981 Such a coefficient is truncated to a signed 16-bit representation by discarding
5982 the higher-order bits of its twos-complement representation.
5984 Although this procedure recomputes the quantization matrices from the
5985 parameters in the setup header for each block, there are at most six different
5986 ones used for each color plane.
5987 An efficient implementation could compute them once in advance.
5989 \begin{enumerate}
5990 \item
5991 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
5992 \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and
5993 \bitvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to
5994 compute the DC quantization matrix \locvar{QMAT}.
5995 \item
5996 Assign \locvar{C} the value
5997 $\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]$.
5998 \item
5999 Truncate \locvar{C} to a 16-bit representation by dropping any higher-order
6000 bits.
6001 \item
6002 Assign $\bitvar{DQC}[0]$ the value \locvar{C}.
6003 \item
6004 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
6005 \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and
6006 \bitvar{\qi}, use the procedure given in Section~\ref{sub:quant-mat} to
6007 compute the AC quantization matrix \locvar{QMAT}.
6008 \item
6009 For each value of \locvar{\ci} from 1 to 63, inclusive:
6010 \begin{enumerate}
6011 \item
6012 Assign \locvar{\zzi} the index in zig-zag order corresponding to \locvar{\ci}.
6013 E.g., the value at row $(\locvar{\ci}//8)$ and column $(\locvar{\ci}\%8)$ in
6014 Figure~\ref{tab:zig-zag}
6015 \item
6016 Assign \locvar{C} the value
6017 $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\zzi}]*\locvar{QMAT}[\locvar{\ci}]$.
6018 \item
6019 Truncate \locvar{C} to a 16-bit representation by dropping any higher-order
6020 bits.
6021 \item
6022 Assign $\bitvar{DQC}[\locvar{\ci}]$ the value \locvar{C}.
6023 \end{enumerate}
6024 \end{enumerate}
6026 \subsection{The Inverse DCT}
6028 The 2D inverse DCT is separated into two applications of the 1D inverse DCT.
6029 The transform is first applied to each row, and then applied to each column of
6030 the result.
6032 Each application of the 1D inverse DCT scales the values by a factor of two
6033 relative to the orthonormal version of the transform, for a total scale factor
6034 of four for the 2D transform.
6035 It is assumed that a similar scale factor is applied during the forward DCT
6036 used in the encoder, so that a division by 16 is required after the transform
6037 has been applied in both directions.
6038 The inclusion of this scale factor allows the integerized transform to operate
6039 with increased precision.
6040 All divisions throughout the transform are implemented with right shifts.
6041 Only the final division by $16$ is rounded, with ties rounded towards positive
6042 infinity.
6044 All intermediate values are truncated to a 32-bit signed representation by
6045 discarding any higher-order bits in their two's complement representation.
6046 The final output of each 1D transform is truncated to 16-bits in the same
6047 manner.
6048 In practice, 32 bits is sufficient for every calculation except scaling by
6049 $C4$.
6050 Here we specify truncating to 16 bits after the right shift by 16, but this is
6051 equivalent to truncating the result of the multiply to 32 bits before the
6052 right shift.
6054 The 1D transform can only overflow if input coefficients larger than $\pm 6201$
6055 are present.
6056 However, the result of applying the 2D forward transform on pixel values in the
6057 range $-255\ldots 255$ can be as large as $\pm 8157$ due to the scale factor
6058 of four that is applied, and quantization errors could make this even larger.
6059 Therefore, the coefficients cannot simply be clamped into a valid range, as
6060 they could still overflow just the 1D inverse transform by itself.
6062 \subsubsection{The 1D Inverse DCT}
6063 \label{sub:1d-idct}
6065 \paragraph{Input parameters:}\hfill\\*
6066 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6067 \multicolumn{1}{c}{Name} &
6068 \multicolumn{1}{c}{Type} &
6069 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6070 \multicolumn{1}{c}{Signed?} &
6071 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6072 \bitvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} &
6073 16 & Yes & An 8-element array of DCT
6074 coefficients. \\
6075 \bottomrule\end{tabularx}
6077 \paragraph{Output parameters:}\hfill\\*
6078 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6079 \multicolumn{1}{c}{Name} &
6080 \multicolumn{1}{c}{Type} &
6081 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6082 \multicolumn{1}{c}{Signed?} &
6083 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6084 \bitvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} &
6085 16 & Yes & An 8-element array of output values. \\
6086 \bottomrule\end{tabularx}
6088 \paragraph{Variables used:}\hfill\\*
6089 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6090 \multicolumn{1}{c}{Name} &
6091 \multicolumn{1}{c}{Type} &
6092 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6093 \multicolumn{1}{c}{Signed?} &
6094 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6095 \locvar{T} & \multicolumn{1}{p{40pt}}{Integer Array} &
6096 32 & Yes & An 8-element array containing the
6097 current value of each signal line. \\
6098 \locvar{R} & Integer & 32 & Yes & A temporary value. \\
6099 \bottomrule\end{tabularx}
6100 \medskip
6102 A compliant decoder MUST use the exact implementation of the inverse DCT
6103 defined in this specification.
6104 Some operations may be re-ordered, but the result must be precisely equivalent.
6105 This is a design decision that limits some avenues of decoder optimization, but
6106 prevents any drift in the prediction loop.
6107 Theora uses a 16-bit integerized approximation of of the 8-point 1D inverse DCT
6108 based on the Chen factorization \cite{CSF77}.
6109 It requires 16 multiplications and 26 additions and subtractions.
6111 \begin{figure}[htbp]
6112 \begin{center}
6113 \includegraphics[width=\textwidth]{idct}
6114 \end{center}
6115 \caption{Signal Flow Graph for the 1D Inverse DCT}
6116 \label{fig:idct}
6117 \end{figure}
6119 A signal flow graph of the transformation is presented in
6120 Figure~\ref{fig:idct}.
6121 This graph provides a good visualization of which parts of the transform are
6122 parallelizable.
6123 Time increases from left to right.
6125 Each signal line is involved in an operation where the line is marked with a
6126 dot $\cdot$ or a circled plus sign $\oplus$.
6127 The constants $\locvar{C}i$ and $\locvar{S}j$ are the 16-bit integer
6128 approximations of $\cos(\frac{i\pi}{16})$ and $\sin(\frac{j\pi}{16})$ listed
6129 in Table~\ref{tab:dct-consts}.
6130 When they appear next to a signal line, the value on that line is scaled by the
6131 given constant.
6132 A circled minus sign $\ominus$ next to a signal line indicates that the value
6133 on that line is negated.
6135 Operations on a single signal path through the graph cannot be reordered, but
6136 operations on different paths may be, or may be executed in parallel.
6137 The column of numbers on the left represents an initial permutation of the
6138 input DCT coefficients.
6139 The column on the right represents the unpermuted output.
6140 One can be obtained by bit-reversing the 3-bit binary representation of the
6141 other.
6143 \begin{table}[htbp]
6144 \begin{center}
6145 \begin{tabular}{llr}\toprule
6146 $\locvar{C}i$ & $\locvar{S}j$ & Value \\\midrule
6147 $\locvar{C1}$ & $\locvar{S7}$ & $64277$ \\
6148 $\locvar{C2}$ & $\locvar{S6}$ & $60547$ \\
6149 $\locvar{C3}$ & $\locvar{S5}$ & $54491$ \\
6150 $\locvar{C4}$ & $\locvar{S4}$ & $46341$ \\
6151 $\locvar{C5}$ & $\locvar{S3}$ & $36410$ \\
6152 $\locvar{C6}$ & $\locvar{S2}$ & $25080$ \\
6153 $\locvar{C7}$ & $\locvar{S1}$ & $12785$ \\
6154 \bottomrule\end{tabular}
6155 \end{center}
6156 \caption{16-bit Approximations of Sines and Cosines}
6157 \label{tab:dct-consts}
6158 \end{table}
6160 \begin{enumerate}
6161 \item
6162 Assign $\locvar{T}[0]$ the value
6163 $\locvar{C4}*(\bitvar{Y}[0]+\bitvar{Y}[4])>>16$.
6164 \item
6165 Truncate $\locvar{T}[0]$ to a 16-bit representation by dropping any
6166 higher-order bits.
6167 \item
6168 Assign $\locvar{T}[1]$ the value
6169 $\locvar{C4}*(\bitvar{Y}[0]-\bitvar{Y}[4])>>16$.
6170 \item
6171 Truncate $\locvar{T}[1]$ to a 16-bit representation by dropping any
6172 higher-order bits.
6173 \item
6174 Assign $\locvar{T}[2]$ the value $(\locvar{C6}*\bitvar{Y}[2]>>16)-
6175 (\locvar{S6}*\bitvar{Y}[6]>>16)$.
6176 \item
6177 Assign $\locvar{T}[3]$ the value $(\locvar{S6}*\bitvar{Y}[2]>>16)+
6178 (\locvar{C6}*\bitvar{Y}[6]>>16)$.
6179 \item
6180 Assign $\locvar{T}[4]$ the value $(\locvar{C7}*\bitvar{Y}[1]>>16)-
6181 (\locvar{S7}*\bitvar{X}[7]>>16)$.
6182 \item
6183 Assign $\locvar{T}[5]$ the value $(\locvar{C3}*\bitvar{Y}[5]>>16)-
6184 (\locvar{S3}*\bitvar{X}[3]>>16)$.
6185 \item
6186 Assign $\locvar{T}[6]$ the value $(\locvar{S3}*\bitvar{Y}[5]>>16)+
6187 (\locvar{C3}*\bitvar{X}[3]>>16)$.
6188 \item
6189 Assign $\locvar{T}[7]$ the value $(\locvar{S7}*\bitvar{Y}[1]>>16)+
6190 (\locvar{C7}*\bitvar{X}[7]>>16)$.
6191 \item
6192 Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$.
6193 \item
6194 Assign $\locvar{T}[5]$ the value
6195 $\locvar{C4}*(\locvar{T}[4]-\locvar{T}[5])>>16$.
6196 \item
6197 Truncate $\locvar{T}[5]$ to a 16-bit representation by dropping any
6198 higher-order bits.
6199 \item
6200 Assign $\locvar{T}[4]$ the value $\locvar{R}$.
6201 \item
6202 Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$.
6203 \item
6204 Assign $\locvar{T}[6]$ the value
6205 $\locvar{C4}*(\locvar{T}[7]-\locvar{T}[6])>>16$.
6206 \item
6207 Truncate $\locvar{T}[6]$ to a 16-bit representation by dropping any
6208 higher-order bits.
6209 \item
6210 Assign $\locvar{T}[7]$ the value $\locvar{R}$.
6211 \item
6212 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$.
6213 \item
6214 Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$.
6215 \item
6216 Assign $\locvar{T}[0]$ the value \locvar{R}.
6217 \item
6218 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$
6219 \item
6220 Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$
6221 \item
6222 Assign $\locvar{T}[1]$ the value \locvar{R}.
6223 \item
6224 Assign \locvar{R} the value $\locvar{T}[6]+\locvar{T}[5]$.
6225 \item
6226 Assign $\locvar{T}[5]$ the value $\locvar{T}[6]-\locvar{T}[5]$.
6227 \item
6228 Assign $\locvar{T}[6]$ the value \locvar{R}.
6229 \item
6230 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[7]$.
6231 \item
6232 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6233 bits.
6234 \item
6235 Assign $\bitvar{X}[0]$ the value \locvar{R}.
6236 \item
6237 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[6]$.
6238 \item
6239 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6240 bits.
6241 \item
6242 Assign $\bitvar{X}[1]$ the value \locvar{R}.
6243 \item
6244 Assign \locvar{R} the value $\locvar{T}[2]+\locvar{T}[5]$.
6245 \item
6246 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6247 bits.
6248 \item
6249 Assign $\bitvar{X}[2]$ the value \locvar{R}.
6250 \item
6251 Assign \locvar{R} the value $\locvar{T}[3]+\locvar{T}[4]$.
6252 \item
6253 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6254 bits.
6255 \item
6256 Assign $\bitvar{X}[3]$ the value \locvar{R}.
6257 \item
6258 Assign \locvar{R} the value $\locvar{T}[3]-\locvar{T}[4]$.
6259 \item
6260 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6261 bits.
6262 \item
6263 Assign $\bitvar{X}[4]$ the value \locvar{R}.
6264 \item
6265 Assign \locvar{R} the value $\locvar{T}[2]-\locvar{T}[5]$.
6266 \item
6267 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6268 bits.
6269 \item
6270 Assign $\bitvar{X}[5]$ the value \locvar{R}.
6271 \item
6272 Assign \locvar{X} the value $\locvar{T}[1]-\locvar{T}[6]$.
6273 \item
6274 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6275 bits.
6276 \item
6277 Assign $\bitvar{X}[6]$ the value \locvar{R}.
6278 \item
6279 Assign \locvar{R} the value $\locvar{T}[0]-\locvar{T}[7]$.
6280 \item
6281 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6282 bits.
6283 \item
6284 Assign $\bitvar{X}[7]$ the value \locvar{R}.
6285 \end{enumerate}
6287 \subsubsection{The 2D Inverse DCT}
6288 \label{sub:2d-idct}
6290 \paragraph{Input parameters:}\hfill\\*
6291 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6292 \multicolumn{1}{c}{Name} &
6293 \multicolumn{1}{c}{Type} &
6294 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6295 \multicolumn{1}{c}{Signed?} &
6296 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6297 \bitvar{DQC} & \multicolumn{1}{p{40pt}}{Integer Array} &
6298 14 & Yes & A $64$-element array of dequantized
6299 DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\
6300 \bottomrule\end{tabularx}
6302 \paragraph{Output parameters:}\hfill\\*
6303 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6304 \multicolumn{1}{c}{Name} &
6305 \multicolumn{1}{c}{Type} &
6306 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6307 \multicolumn{1}{c}{Signed?} &
6308 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6309 \bitvar{RES} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6310 16 & Yes & An $8\times 8$ array containing the
6311 decoded residual for the current block. \\
6312 \bottomrule\end{tabularx}
6314 \paragraph{Variables used:}\hfill\\*
6315 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6316 \multicolumn{1}{c}{Name} &
6317 \multicolumn{1}{c}{Type} &
6318 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6319 \multicolumn{1}{c}{Signed?} &
6320 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6321 \locvar{\ci} & Integer & 3 & No & The column index. \\
6322 \locvar{\ri} & Integer & 3 & No & The row index. \\
6323 \locvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} &
6324 16 & Yes & An 8-element array of 1-D iDCT input
6325 values. \\
6326 \locvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} &
6327 16 & Yes & An 8-element array of 1-D iDCT output
6328 values. \\
6329 \bottomrule\end{tabularx}
6330 \medskip
6332 This procedure applies the 1-D inverse DCT transform 16 times to a block of
6333 dequantized coefficients: once for each of the 8 rows, and once for each of
6334 the 8 columns of the result.
6335 Note that the coordinate system used for the columns is the same right-handed
6336 coordinate system used by the rest of Theora.
6337 Thus, the column is indexed from bottom to top, not top to bottom.
6338 The final values are divided by sixteen, rounding with ties rounded towards
6339 postive infinity.
6341 \begin{enumerate}
6342 \item
6343 For each value of \locvar{\ri} from 0 to 7:
6344 \begin{enumerate}
6345 \item
6346 For each value of \locvar{\ci} from 0 to 7:
6347 \begin{enumerate}
6348 \item
6349 Assign $\locvar{Y}[\locvar{\ci}]$ the value
6350 $\bitvar{DQC}[\locvar{\ri}*8+\locvar{\ci}]$.
6351 \end{enumerate}
6352 \item
6353 Compute \locvar{X}, the 1-D inverse DCT of \locvar{Y} using the procedure
6354 described in Section~\ref{sub:1d-idct}.
6355 \item
6356 For each value of $\locvar{\ci}$ from 0 to 7:
6357 \begin{enumerate}
6358 \item
6359 Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value
6360 $\locvar{X}[\locvar{\ci}]$.
6361 \end{enumerate}
6362 \end{enumerate}
6363 \item
6364 For each value of \locvar{\ci} from 0 to 7:
6365 \begin{enumerate}
6366 \item
6367 For each value of \locvar{\ri} from 0 to 7:
6368 \begin{enumerate}
6369 \item
6370 Assign $\locvar{Y}[\locvar{\ri}]$ the value
6371 $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$.
6372 \end{enumerate}
6373 \item
6374 Compute \locvar{X}, the 1-D inverse DCT of \locvar{Y} using the procedure
6375 described in Section~\ref{sub:1d-idct}.
6376 \item
6377 For each value of \locvar{\ri} from 0 to 7:
6378 \begin{enumerate}
6379 \item
6380 Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value
6381 $(\locvar{X}[\locvar{\ri}]+8)>>4$.
6382 \end{enumerate}
6383 \end{enumerate}
6384 \end{enumerate}
6386 \subsubsection{The 1D Forward DCT (Non-Normative)}
6388 \paragraph{Input parameters:}\hfill\\*
6389 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6390 \multicolumn{1}{c}{Name} &
6391 \multicolumn{1}{c}{Type} &
6392 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6393 \multicolumn{1}{c}{Signed?} &
6394 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6395 \bitvar{X} & \multicolumn{1}{p{40pt}}{Integer Array} &
6396 14 & Yes & An 8-element array of input values. \\
6397 \bottomrule\end{tabularx}
6399 \paragraph{Output parameters:}\hfill\\*
6400 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6401 \multicolumn{1}{c}{Name} &
6402 \multicolumn{1}{c}{Type} &
6403 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6404 \multicolumn{1}{c}{Signed?} &
6405 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6406 \bitvar{Y} & \multicolumn{1}{p{40pt}}{Integer Array} &
6407 16 & Yes & An 8-element array of DCT
6408 coefficients. \\
6409 \bottomrule\end{tabularx}
6411 \paragraph{Variables used:}\hfill\\*
6412 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6413 \multicolumn{1}{c}{Name} &
6414 \multicolumn{1}{c}{Type} &
6415 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6416 \multicolumn{1}{c}{Signed?} &
6417 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6418 \locvar{T} & \multicolumn{1}{p{40pt}}{Integer Array} &
6419 16 & Yes & An 8-element array containing the
6420 current value of each signal line. \\
6421 \locvar{R} & Integer & 16 & Yes & A temporary value. \\
6422 \bottomrule\end{tabularx}
6423 \medskip
6425 The forward transform used in the encoder is not mandated by this standard as
6426 the inverse one is.
6427 Precise equivalence in the inverse transform alone is all that is required to
6428 guarantee that there is no mismatch in the prediction loop between encoder and
6429 any compliant decoder implementation.
6430 However, a forward transform is provided here as a convenience for implementing
6431 an encoder.
6432 This is the version of the transform used by Xiph.org's Theora encoder, which
6433 is the same as that used by VP3.
6434 Like the inverse DCT, it is first applied to each row, and then applied to each
6435 column of the result.
6437 \begin{figure}[htbp]
6438 \begin{center}
6439 \includegraphics[width=\textwidth]{fdct}
6440 \end{center}
6441 \caption{Signal Flow Graph for the 1D Forward DCT}
6442 \label{fig:fdct}
6443 \end{figure}
6445 The signal flow graph for the forward transform is given in
6446 Figure~\ref{fig:fdct}.
6447 It is largely the reverse of the flow graph given for the inverse DCT.
6448 It is important to note that the signs on the constants in the rotations have
6449 changed, and the \locvar{C4} scale factors on one of the lower butterflies now
6450 appear on the opposite side.
6451 The column of numbers on the left represents the unpermuted input, and the
6452 column on the right the permuted output DCT coefficients.
6454 A proper division by $2^{16}$ is done after the multiplications instead of a
6455 shift in the forward transform.
6456 This can be implemented quickly by adding an offset of $\hex{FFFF}$ if the
6457 number is negative, and then shifting as before.
6458 This slightly increases the computational complexity of the transform.
6459 Unlike the inverse DCT, 16 bit registers and a $16\times16\rightarrow32$ bit
6460 multiply are sufficient to avoid any overflow, so long as the input is in the
6461 range $-6270\ldots 6270$, which is larger than required.
6463 \begin{enumerate}
6464 \item
6465 Assign $\locvar{T}[0]$ the value $\bitvar{X}[0]+\bitvar{X}[7]$.
6466 \item
6467 Assign $\locvar{T}[1]$ the value $\bitvar{X}[1]+\bitvar{X}[6]$.
6468 \item
6469 Assign $\locvar{T}[2]$ the value $\bitvar{X}[2]+\bitvar{X}[5]$.
6470 \item
6471 Assign $\locvar{T}[3]$ the value $\bitvar{X}[3]+\bitvar{X}[4]$.
6472 \item
6473 Assign $\locvar{T}[4]$ the value $\bitvar{X}[3]-\bitvar{X}[4]$.
6474 \item
6475 Assign $\locvar{T}[5]$ the value $\bitvar{X}[2]-\bitvar{X}[5]$.
6476 \item
6477 Assign $\locvar{T}[6]$ the value $\bitvar{X}[1]-\bitvar{X}[6]$.
6478 \item
6479 Assign $\locvar{T}[7]$ the value $\bitvar{X}[0]-\bitvar{X}[7]$.
6480 \item
6481 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$.
6482 \item
6483 Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$.
6484 \item
6485 Assign $\locvar{T}[0]$ the value \locvar{R}.
6486 \item
6487 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$.
6488 \item
6489 Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$.
6490 \item
6491 Assign $\locvar{T}[1]$ the value \locvar{R}.
6492 \item
6493 Assign \locvar{R} the value $\locvar{T}[6]-\locvar{T}[5]$.
6494 \item
6495 Assign $\locvar{T}[6]$ the value
6496 $(\locvar{C4}*(\locvar{T}[6]+\locvar{T}[5]))//16$.
6497 \item
6498 Assign $\locvar{T}[5]$ the value $(\locvar{C4}*\locvar{R})//16$.
6499 \item
6500 Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$.
6501 \item
6502 Assign $\locvar{T}[5]$ the value $\locvar{T}[4]-\locvar{T}[5]$.
6503 \item
6504 Assign $\locvar{T}[4]$ the value \locvar{R}.
6505 \item
6506 Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$.
6507 \item
6508 Assign $\locvar{T}[6]$ the value $\locvar{T}[7]-\locvar{T}[6]$.
6509 \item
6510 Assign $\locvar{T}[7]$ the value \locvar{R}.
6511 \item
6512 Assign $\bitvar{Y}[0]$ the value
6513 $(\locvar{C4}*(\locvar{T}[0]+\locvar{T}[1]))//16$.
6514 \item
6515 Assign $\bitvar{Y}[4]$ the value
6516 $(\locvar{C4}*(\locvar{T}[0]-\locvar{T}[1]))//16$.
6517 \item
6518 Assign $\bitvar{Y}[2]$ the value
6519 $((\locvar{S6}*\locvar{T}[3])//16)+
6520 ((\locvar{C6}*\locvar{T}[2])//16)$.
6521 \item
6522 Assign $\bitvar{Y}[6]$ the value
6523 $((\locvar{C6}*\locvar{T}[3])//16)-
6524 ((\locvar{S6}*\locvar{T}[2])//16)$.
6525 \item
6526 Assign $\bitvar{Y}[1]$ the value
6527 $((\locvar{S7}*\locvar{T}[7])//16)+
6528 ((\locvar{C7}*\locvar{T}[4])//16)$.
6529 \item
6530 Assign $\bitvar{Y}[5]$ the value
6531 $((\locvar{S3}*\locvar{T}[6])//16)+
6532 ((\locvar{C3}*\locvar{T}[5])//16)$.
6533 \item
6534 Assign $\bitvar{Y}[3]$ the value
6535 $((\locvar{C3}*\locvar{T}[6])//16)-
6536 ((\locvar{S3}*\locvar{T}[5])//16)$.
6537 \item
6538 Assign $\bitvar{Y}[7]$ the value
6539 $((\locvar{C7}*\locvar{T}[7])//16)-
6540 ((\locvar{S7}*\locvar{T}[4])//16)$.
6541 \end{enumerate}
6543 \subsection{The Complete Reconstruction Algorithm}
6544 \label{sub:recon}
6546 \paragraph{Input parameters:}\hfill\\*
6547 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6548 \multicolumn{1}{c}{Name} &
6549 \multicolumn{1}{c}{Type} &
6550 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6551 \multicolumn{1}{c}{Signed?} &
6552 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6553 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
6554 16 & No & A 64-element array of scale values
6555 for AC coefficients for each \qi\ value. \\
6556 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
6557 16 & No & A 64-element array of scale values
6558 for the DC coefficient for each \qi\ value. \\
6559 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
6560 8 & No & A $\bitvar{NBMS}\times 64$ array
6561 containing the base matrices. \\
6562 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
6563 6 & No & A $2\times 3$ array containing the
6564 number of quant ranges for a given \qti\ and \pli, respectively.
6565 This is at most $63$. \\
6566 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
6567 6 & No & A $2\times 3\times 63$ array of the
6568 sizes of each quant range for a given \qti\ and \pli, respectively.
6569 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
6570 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
6571 9 & No & A $2\times 3\times 64$ array of the
6572 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
6573 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
6574 \bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the
6575 reference frames in pixels. \\
6576 \bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the
6577 reference frames in pixels. \\
6578 \bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$
6579 planes of the reference frames in pixels. \\
6580 \bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$
6581 planes of the reference frames in pixels. \\
6582 \bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6583 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6584 array containing the contents of the $Y'$ plane of the golden reference
6585 frame. \\
6586 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6587 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6588 array containing the contents of the $C_b$ plane of the golden reference
6589 frame. \\
6590 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6591 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6592 array containing the contents of the $C_r$ plane of the golden reference
6593 frame. \\
6594 \bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6595 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6596 array containing the contents of the $Y'$ plane of the previous reference
6597 frame. \\
6598 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6599 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6600 array containing the contents of the $C_b$ plane of the previous reference
6601 frame. \\
6602 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6603 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6604 array containing the contents of the $C_r$ plane of the previous reference
6605 frame. \\
6606 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
6607 frame. \\
6608 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
6609 1 & No & An \bitvar{NBS}-element array of
6610 flags indicating which blocks are coded. \\
6611 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
6612 3 & No & An \bitvar{NMBS}-element array of
6613 coding modes for each macro block. \\
6614 \bitvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
6615 6 & Yes & An \bitvar{NBS}-element array of
6616 motion vectors for each block. \\
6617 \bitvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6618 16 & Yes & An $\bitvar{NBS}\times 64$ array of
6619 quantized DCT coefficient values for each block in zig-zag order. \\
6620 \bitvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
6621 7 & No & An \bitvar{NBS}-element array of the
6622 coefficient count for each block. \\
6623 \bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} &
6624 6 & No & An \bitvar{NQIS}-element array of
6625 \qi\ values. \\
6626 \bitvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
6627 2 & No & An \bitvar{NBS}-element array of
6628 \locvar{\qii} values for each block. \\
6629 \bottomrule\end{tabularx}
6631 \paragraph{Output parameters:}\hfill\\*
6632 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6633 \multicolumn{1}{c}{Name} &
6634 \multicolumn{1}{c}{Type} &
6635 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6636 \multicolumn{1}{c}{Signed?} &
6637 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6638 \bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6639 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6640 array containing the contents of the $Y'$ plane of the reconstructed frame. \\
6641 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6642 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6643 array containing the contents of the $C_b$ plane of the reconstructed frame. \\
6644 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6645 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6646 array containing the contents of the $C_r$ plane of the reconstructed frame. \\
6647 \bottomrule\end{tabularx}
6649 \paragraph{Variables used:}\hfill\\*
6650 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6651 \multicolumn{1}{c}{Name} &
6652 \multicolumn{1}{c}{Type} &
6653 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6654 \multicolumn{1}{c}{Signed?} &
6655 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6656 \locvar{RPW} & Integer & 20 & No & The width of the current plane of the
6657 current reference frame in pixels. \\
6658 \locvar{RPH} & Integer & 20 & No & The height of the current plane of
6659 the current reference frame in pixels. \\
6660 \locvar{REFP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6661 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
6662 array containing the contents of the current plane of the current reference
6663 frame. \\
6664 \locvar{BX} & Integer & 20 & No & The horizontal pixel index of the
6665 lower-left corner of the current block. \\
6666 \locvar{BY} & Integer & 20 & No & The vertical pixel index of the
6667 lower-left corner of the current block. \\
6668 \locvar{MVX} & Integer & 5 & No & The horizontal component of the first
6669 whole-pixel motion vector. \\
6670 \locvar{MVY} & Integer & 5 & No & The vertical component of the first
6671 whole-pixel motion vector. \\
6672 \locvar{MVX2} & Integer & 5 & No & The horizontal component of the second
6673 whole-pixel motion vector. \\
6674 \locvar{MVY2} & Integer & 5 & No & The vertical component of the second
6675 whole-pixel motion vector. \\
6676 \locvar{PRED} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6677 8 & No & An $8\times 8$ array of predictor
6678 values to use for the current block. \\
6679 \locvar{RES} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6680 16 & Yes & An $8\times 8$ array containing the
6681 decoded residual for the current block. \\
6682 \locvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} &
6683 16 & No & A 64-element array of quantization
6684 values for each DCT coefficient in natural order. \\
6685 \locvar{DC} & Integer & 29 & Yes & The dequantized DC coefficient of a
6686 block. \\
6687 \locvar{P} & Integer & 17 & Yes & A reconstructed pixel value. \\
6688 \locvar{\bi} & Integer & 36 & No & The index of the current block in
6689 coded order. \\
6690 \locvar{\mbi} & Integer & 32 & No & The index of the macro block
6691 containing block \locvar{\bi}. \\
6692 \locvar{\pli} & Integer & 2 & No & The color plane index of the current
6693 block. \\
6694 \locvar{\rfi} & Integer & 2 & No & The index of the reference frame
6695 indicated by the coding mode for macro block \locvar{\mbi}. \\
6696 \locvar{\idx{bx}} & Integer & 3 & No & The horizontal pixel index in the
6697 block. \\
6698 \locvar{\idx{by}} & Integer & 3 & No & The vertical pixel index in the
6699 block. \\
6700 \locvar{\qti} & Integer & 1 & No & A quantization type index.
6701 See Table~\ref{tab:quant-types}.\\
6702 \locvar{\idx{qi0}} & Integer & 6 & No & The quantization index of the DC
6703 coefficient. \\
6704 \locvar{\qi} & Integer & 6 & No & The quantization index of the AC
6705 coefficients. \\
6706 \bottomrule\end{tabularx}
6707 \medskip
6709 This section takes the decoded packet data and uses the previously defined
6710 procedures to reconstruct each block of the current frame.
6711 For coded blocks, a predictor is formed using the coding mode and, if
6712 applicable, the motion vector, and then the residual is computed from the
6713 quantized DCT coefficients.
6714 For uncoded blocks, the contents of the co-located block are copied from the
6715 previous frame and the residual is cleared to zero.
6716 Then the predictor and residual are added, and the result clamped to the range
6717 $0\ldots 255$ and stored in the current frame.
6719 In the special case that a block contains only a DC coefficient, the
6720 dequantization and inverse DCT transform is skipped.
6721 Instead the constant pixel value for the entire block is computed in one step.
6722 Note that the truncation of intermediate operations is omitted and the final
6723 rounding is slightly different in this case.
6724 The check for whether or not the block contains only a DC coefficient is based
6725 on the coefficient count returned from the token decode procedure of
6726 Section~\ref{sec:dct-decode}, and not by checking to see if the remaining
6727 coefficient values are zero.
6728 Also note that even when the coefficient count indicates the block contains
6729 zero coefficients, the DC coefficient is still processed, as undoing DC
6730 prediction might have made it non-zero.
6732 After this procedure, the frame is completely reconstructed, but before it can
6733 be used as a reference frame, a loop filter must be run over it to help reduce
6734 blocking artifacts.
6735 This is detailed in Section~\ref{sec:loopfilter}.
6737 \begin{enumerate}
6738 \item
6739 Assign \locvar{\idx{qi0}} the value $\bitvar{QIS}[0]$.
6740 \item
6741 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$:
6742 \begin{enumerate}
6743 \item
6744 Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs
6746 \item
6747 Assign \locvar{BX} the horizontal pixel index of the lower-left corner of block
6748 \locvar{\bi}.
6749 \item
6750 Assign \locvar{BY} the vertical pixel index of the lower-left corner of block
6751 \locvar{\bi}.
6752 \item
6753 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
6754 \begin{enumerate}
6755 \item
6756 Assign \locvar{\mbi} the index of the macro block containing block
6757 \locvar{\bi}.
6758 \item
6759 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 1 (INTRA), assign \locvar{\qti} the
6760 value $0$.
6761 \item
6762 Otherwise, assign \locvar{\qti} the value $1$.
6763 \item
6764 Assign \locvar{\rfi} the value of the Reference Frame Index column of
6765 Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
6766 \item
6767 If \locvar{\rfi} is zero, compute \locvar{PRED} using the procedure given in
6768 Section~\ref{sub:predintra}.
6769 \item
6770 Otherwise:
6771 \begin{enumerate}
6772 \item
6773 Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in
6774 Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and
6775 \locvar{\pli}.
6777 \begin{table}[htbp]
6778 \begin{center}
6779 \begin{tabular}{cclll}\toprule
6780 \locvar{\rfi} & \locvar{\pli} &
6781 \locvar{REFP} & \locvar{RPW} & \locvar{RPH} \\\midrule
6782 $1$ & $0$ & \bitvar{PREVREFY} & \bitvar{RPYW} & \bitvar{RPYH} \\
6783 $1$ & $1$ & \bitvar{PREVREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
6784 $1$ & $2$ & \bitvar{PREVREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
6785 $2$ & $0$ & \bitvar{GOLDREFY} & \bitvar{RPYW} & \bitvar{RPYH} \\
6786 $2$ & $1$ & \bitvar{GOLDREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
6787 $2$ & $2$ & \bitvar{GOLDREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
6788 \bottomrule\end{tabular}
6789 \end{center}
6790 \caption{Reference Planes and Sizes for Each \locvar{\rfi} and \locvar{\pli}}
6791 \label{tab:refp}
6792 \end{table}
6794 \item
6795 Assign \locvar{MVX} the value
6796 \begin{equation*}
6797 \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rfloor*
6798 \sign(\bitvar{MVECTS}[\locvar{\bi}]_x).
6799 \end{equation*}
6800 \item
6801 Assign \locvar{MVY} the value
6802 \begin{equation*}
6803 \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rfloor*
6804 \sign(\bitvar{MVECTS}[\locvar{\bi}]_y).
6805 \end{equation*}
6806 \item
6807 Assign \locvar{MVX2} the value
6808 \begin{equation*}
6809 \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rceil*
6810 \sign(\bitvar{MVECTS}[\locvar{\bi}]_x).
6811 \end{equation*}
6812 \item
6813 Assign \locvar{MVY2} the value
6814 \begin{equation*}
6815 \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rceil*
6816 \sign(\bitvar{MVECTS}[\locvar{\bi}]_y).
6817 \end{equation*}
6818 \item
6819 If \locvar{MVX} equals \locvar{MVX2} and \locvar{MVY} equals \locvar{MVY2},
6820 use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX},
6821 \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the
6822 procedure given in Section~\ref{sub:predfullpel}.
6823 \item
6824 Otherwise, use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH},
6825 \locvar{BX}, \locvar{BY}, \locvar{MVX}, \locvar{MVY}, \locvar{MVX2}, and
6826 \locvar{MVY2} to compute \locvar{PRED} using the procedure given in
6827 Section~\ref{sub:predhalfpel}.
6828 \end{enumerate}
6829 \item
6830 If $\bitvar{NCOEFFS}[\locvar{\bi}]$ is less than 2:
6831 \begin{enumerate}
6832 \item
6833 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\
6834 \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli}, and
6835 \locvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to
6836 compute the DC quantization matrix \locvar{QMAT}.
6837 \item
6838 Assign \locvar{DC} the value
6839 \begin{equation*}
6840 (\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]+15)>>5.
6841 \end{equation*}
6842 \item
6843 Truncate \locvar{DC} to a 16-bit representation by dropping any higher-order
6844 bits.
6845 \item
6846 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6847 \locvar{\idx{bx}} from 0 to 7, assign
6848 $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value \locvar{DC}.
6849 \end{enumerate}
6850 \item
6851 Otherwise:
6852 \begin{enumerate}
6853 \item
6854 Assign \locvar{\qi} the value $\bitvar{QIS}[\bitvar{QIIS}[\locvar{\bi}]]$.
6855 \item
6856 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\
6857 \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli},
6858 \locvar{\idx{qi0}}, and \locvar{\qi}, compute \locvar{DQC} using the procedure
6859 given in Section~\ref{sub:dequant}.
6860 \item
6861 Using \locvar{DQC}, compute \locvar{RES} using the procedure given in
6862 Section~\ref{sub:2d-idct}.
6863 \end{enumerate}
6864 \end{enumerate}
6865 \item
6866 Otherwise:
6867 \begin{enumerate}
6868 \item
6869 Assign \locvar{\rfi} the value 1.
6870 \item
6871 Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in
6872 Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and
6873 \locvar{\pli}.
6874 \item
6875 Assign \locvar{MVX} the value 0.
6876 \item
6877 Assign \locvar{MVY} the value 0.
6878 \item
6879 Using the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX},
6880 \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the
6881 procedure given in Section~\ref{sub:predfullpel}.
6882 This is simply a copy of the co-located block in the previous reference frame.
6883 \item
6884 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6885 \locvar{\idx{bx}} from 0 to 7, assign
6886 $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value 0.
6887 \end{enumerate}
6888 \item
6889 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6890 \locvar{\idx{bx}} from 0 to 7:
6891 \begin{enumerate}
6892 \item
6893 Assign \locvar{P} the value
6894 $(\locvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]+
6895 \locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}])$.
6896 \item
6897 If \locvar{P} is greater than $255$, assign \locvar{P} the value $255$.
6898 \item
6899 If \locvar{P} is less than $0$, assign \locvar{P} the value $0$.
6900 \item
6901 If \locvar{\pli} equals 0, assign
6902 $\bitvar{RECY}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6903 the value \locvar{P}.
6904 \item
6905 Otherwise, if \locvar{\pli} equals 1, assign
6906 $\bitvar{RECB}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6907 the value \locvar{P}.
6908 \item
6909 Otherwise, \locvar{\pli} equals 2, so assign
6910 $\bitvar{RECR}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6911 the value \locvar{P}.
6912 \end{enumerate}
6913 \end{enumerate}
6914 \end{enumerate}
6916 \section{Loop Filtering}
6917 \label{sec:loopfilter}
6919 The loop filter is a simple deblocking filter that is based on running a small
6920 edge detecting filter over the coded block edges and adjusting the pixel
6921 values by a tapered response.
6922 The filter response is modulated by the following non-linear function:
6923 \begin{align*}
6924 \lflim(\locvar{R},\bitvar{L})&=\left\{\begin{array}{ll}
6925 0, & \locvar{R}\le-2*\bitvar{L} \\
6926 -\locvar{R}-2*\bitvar{L}, & -2*\bitvar{L}<\locvar{R}\le-\bitvar{L} \\
6927 \locvar{R}, & -\bitvar{L}<\locvar{R}<\bitvar{L} \\
6928 -\locvar{R}+2*\bitvar{L}, & \bitvar{L}\le\locvar{R}<2*\bitvar{L} \\
6929 0, & 2*\bitvar{L}\le\locvar{R}
6930 \end{array}\right.
6931 \end{align*}
6932 Here \bitvar{L} is a limiting value equal to $\bitvar{LFLIMS}[\idx{qi0}]$.
6933 It defines the peaks of the function.
6934 \bitvar{LFLIMS} is an array of values specified in the setup header and is
6935 indexed by \idx{qi0}, the first quantization index for the frame, the one used
6936 for all the DC coefficients.
6937 Larger values of \bitvar{L} indicate a stronger filter.
6939 \subsection{Horizontal Filter}
6940 \label{sub:filth}
6942 \paragraph{Input parameters:}\hfill\\*
6943 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6944 \multicolumn{1}{c}{Name} &
6945 \multicolumn{1}{c}{Type} &
6946 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6947 \multicolumn{1}{c}{Signed?} &
6948 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6949 \bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6950 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
6951 array containing the contents of a plane of the reconstructed frame. \\
6952 \bitvar{FX} & Integer & 20 & No & The horizontal pixel index of the
6953 lower-left corner of the area to be filtered. \\
6954 \bitvar{FY} & Integer & 20 & No & The vertical pixel index of the
6955 lower-left corner of the area to be filtered. \\
6956 \bitvar{L} & Integer & 7 & No & The loop filter limit value. \\
6957 \bottomrule\end{tabularx}
6959 \paragraph{Output parameters:}\hfill\\*
6960 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6961 \multicolumn{1}{c}{Name} &
6962 \multicolumn{1}{c}{Type} &
6963 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6964 \multicolumn{1}{c}{Signed?} &
6965 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6966 \bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6967 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
6968 array containing the contents of a plane of the reconstructed frame. \\
6969 \bottomrule\end{tabularx}
6971 \paragraph{Variables used:}\hfill\\*
6972 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6973 \multicolumn{1}{c}{Name} &
6974 \multicolumn{1}{c}{Type} &
6975 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6976 \multicolumn{1}{c}{Signed?} &
6977 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6978 \locvar{R} & Integer & 9 & Yes & The edge detector response. \\
6979 \locvar{P} & Integer & 9 & Yes & A filtered pixel value. \\
6980 \locvar{\idx{by}} & Integer & 20 & No & The vertical pixel index in the
6981 block. \\
6982 \bottomrule\end{tabularx}
6983 \medskip
6985 This procedure applies a $4$-tap horizontal filter to each row of a vertical
6986 block edge.
6988 \begin{enumerate}
6989 \item
6990 For each value of \locvar{\idx{by}} from $0$ to $7$:
6991 \begin{enumerate}
6992 \item
6993 Assign \locvar{R} the value
6994 \begin{multline*}
6995 (\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}]-
6996 3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+\\
6997 3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]-
6998 \bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+3]+4)>>3
6999 \end{multline*}
7000 \item
7001 Assign \locvar{P} the value
7002 $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+
7003 \lflim(\locvar{R},\bitvar{L}))$.
7004 \item
7005 If \locvar{P} is less than zero, assign
7006 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value zero.
7007 \item
7008 Otherwise, if \locvar{P} is greater than $255$, assign
7009 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value $255$.
7010 \item
7011 Otherwise, assign
7012 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value
7013 \locvar{P}.
7014 \item
7015 Assign \locvar{P} the value
7016 $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]-
7017 \lflim(\locvar{R},\bitvar{L}))$.
7018 \item
7019 If \locvar{P} is less than zero, assign
7020 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value zero.
7021 \item
7022 Otherwise, if \locvar{P} is greater than $255$, assign
7023 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value $255$.
7024 \item
7025 Otherwise, assign
7026 $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value
7027 \locvar{P}.
7028 \end{enumerate}
7029 \end{enumerate}
7031 \subsection{Vertical Filter}
7032 \label{sub:filtv}
7034 \paragraph{Input parameters:}\hfill\\*
7035 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7036 \multicolumn{1}{c}{Name} &
7037 \multicolumn{1}{c}{Type} &
7038 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7039 \multicolumn{1}{c}{Signed?} &
7040 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7041 \bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7042 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
7043 array containing the contents of a plane of the reconstructed frame. \\
7044 \bitvar{FX} & Integer & 20 & No & The horizontal pixel index of the
7045 lower-left corner of the area to be filtered. \\
7046 \bitvar{FY} & Integer & 20 & No & The vertical pixel index of the
7047 lower-left corner of the area to be filtered. \\
7048 \bitvar{L} & Integer & 7 & No & The loop filter limit value. \\
7049 \bottomrule\end{tabularx}
7051 \paragraph{Output parameters:}\hfill\\*
7052 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7053 \multicolumn{1}{c}{Name} &
7054 \multicolumn{1}{c}{Type} &
7055 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7056 \multicolumn{1}{c}{Signed?} &
7057 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7058 \bitvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7059 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
7060 array containing the contents of a plane of the reconstructed frame. \\
7061 \bottomrule\end{tabularx}
7063 \paragraph{Variables used:}\hfill\\*
7064 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7065 \multicolumn{1}{c}{Name} &
7066 \multicolumn{1}{c}{Type} &
7067 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7068 \multicolumn{1}{c}{Signed?} &
7069 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7070 \locvar{R} & Integer & 9 & Yes & The edge detector response. \\
7071 \locvar{P} & Integer & 9 & Yes & A filtered pixel value. \\
7072 \locvar{\idx{bx}} & Integer & 20 & No & The horizontal pixel index in the
7073 block. \\
7074 \bottomrule\end{tabularx}
7075 \medskip
7077 This procedure applies a $4$-tap vertical filter to each column of a horizontal
7078 block edge.
7080 \begin{enumerate}
7081 \item
7082 For each value of \locvar{\idx{bx}} from $0$ to $7$:
7083 \begin{enumerate}
7084 \item
7085 Assign \locvar{R} the value
7086 \begin{multline*}
7087 (\bitvar{RECP}[\bitvar{FY}][\bitvar{FX}+\locvar{\idx{bx}}]-
7088 3*\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+\\
7089 3*\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]-
7090 \bitvar{RECP}[\bitvar{FY}+3][\bitvar{FX}+\locvar{\idx{bx}}]+4)>>3
7091 \end{multline*}
7092 \item
7093 Assign \locvar{P} the value
7094 $(\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+
7095 \lflim(\locvar{R},\bitvar{L}))$.
7096 \item
7097 If \locvar{P} is less than zero, assign
7098 $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero.
7099 \item
7100 Otherwise, if \locvar{P} is greater than $255$, assign
7101 $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$.
7102 \item
7103 Otherwise, assign
7104 $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value
7105 \locvar{P}.
7106 \item
7107 Assign \locvar{P} the value
7108 $(\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]-
7109 \lflim(\locvar{R},\bitvar{L}))$.
7110 \item
7111 If \locvar{P} is less than zero, assign
7112 $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero.
7113 \item
7114 Otherwise, if \locvar{P} is greater than $255$, assign
7115 $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$.
7116 \item
7117 Otherwise, assign
7118 $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value
7119 \locvar{P}.
7120 \end{enumerate}
7121 \end{enumerate}
7123 \subsection{Complete Loop Filter}
7124 \label{sub:loop-filt}
7126 \paragraph{Input parameters:}\hfill\\*
7127 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7128 \multicolumn{1}{c}{Name} &
7129 \multicolumn{1}{c}{Type} &
7130 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7131 \multicolumn{1}{c}{Signed?} &
7132 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7133 \bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} &
7134 7 & No & A 64-element array of loop filter limit
7135 values. \\
7136 \bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the
7137 reconstruced frame in pixels. \\
7138 \bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the
7139 reconstruced frame in pixels. \\
7140 \bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$
7141 planes of the reconstruced frame in pixels. \\
7142 \bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$
7143 planes of the reconstruced frame in pixels. \\
7144 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
7145 frame. \\
7146 \bitvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
7147 1 & No & An \bitvar{NBS}-element array of
7148 flags indicating which blocks are coded. \\
7149 \bitvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} &
7150 6 & No & An \bitvar{NQIS}-element array of
7151 \qi\ values. \\
7152 \bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7153 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7154 array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7155 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7156 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7157 array containing the contents of the $C_b$ plane of the reconstructed frame. \\
7158 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7159 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7160 array containing the contents of the $C_r$ plane of the reconstructed frame. \\
7161 \bottomrule\end{tabularx}
7163 \paragraph{Output parameters:}\hfill\\*
7164 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7165 \multicolumn{1}{c}{Name} &
7166 \multicolumn{1}{c}{Type} &
7167 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7168 \multicolumn{1}{c}{Signed?} &
7169 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7170 \bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7171 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7172 array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7173 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7174 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7175 array containing the contents of the $C_b$ plane of the reconstructed frame. \\
7176 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7177 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7178 array containing the contents of the $C_r$ plane of the reconstructed frame. \\
7179 \bottomrule\end{tabularx}
7181 \paragraph{Variables used:}\hfill\\*
7182 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7183 \multicolumn{1}{c}{Name} &
7184 \multicolumn{1}{c}{Type} &
7185 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7186 \multicolumn{1}{c}{Signed?} &
7187 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7188 \locvar{RPW} & Integer & 20 & No & The width of the current plane of the
7189 reconstructed frame in pixels. \\
7190 \locvar{RPH} & Integer & 20 & No & The height of the current plane of
7191 the reconstructed frame in pixels. \\
7192 \locvar{RECP} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7193 8 & No & A $\bitvar{RPH}\times\bitvar{RPW}$
7194 array containing the contents of the current plane of the reconstruced
7195 frame. \\
7196 \locvar{BX} & Integer & 20 & No & The horizontal pixel index of the
7197 lower-left corner of the current block. \\
7198 \locvar{BY} & Integer & 20 & No & The vertical pixel index of the
7199 lower-left corner of the current block. \\
7200 \locvar{FX} & Integer & 20 & No & The horizontal pixel index of the
7201 lower-left corner of the area to be filtered. \\
7202 \locvar{FY} & Integer & 20 & No & The vertical pixel index of the
7203 lower-left corner of the area to be filtered. \\
7204 \locvar{L} & Integer & 7 & No & The loop filter limit value. \\
7205 \locvar{\bi} & Integer & 36 & No & The index of the current block in
7206 coded order. \\
7207 \locvar{\bj} & Integer & 36 & No & The index of a neighboring block in
7208 coded order. \\
7209 \locvar{\pli} & Integer & 2 & No & The color plane index of the current
7210 block. \\
7211 \bottomrule\end{tabularx}
7212 \medskip
7214 This procedure defines the order that the various block edges are filtered.
7215 Because each application of one of the two filters above destructively modifies
7216 the contents of the reconstructed image, the precise output obtained differs
7217 depending on the order that horizontal and vertical filters are applied to the
7218 edges of a single block.
7219 The order defined here conforms to that used by VP3.
7221 \begin{enumerate}
7222 \item
7223 Assign \locvar{L} the value $\bitvar{LFLIMS}[\bitvar{QIS}[0]]$.
7224 \item
7225 For each block in {\em raster} order, with coded-order index \locvar{\bi}:
7226 \begin{enumerate}
7227 \item
7228 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
7229 \begin{enumerate}
7230 \item
7231 Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs
7233 \item
7234 Assign \locvar{RECP}, \locvar{RPW}, and \locvar{RPH} the values given in
7235 Table~\ref{tab:recp} corresponding to the value of \locvar{\pli}.
7237 \begin{table}[htbp]
7238 \begin{center}
7239 \begin{tabular}{clll}\toprule
7240 \locvar{\pli} & \locvar{RECP} & \locvar{RPW} & \locvar{RPH} \\\midrule
7241 $0$ & \bitvar{RECY} & \bitvar{RPYW} & \bitvar{RPYH} \\
7242 $1$ & \bitvar{RECCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
7243 $2$ & \bitvar{RECCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
7244 \bottomrule\end{tabular}
7245 \end{center}
7246 \caption{Reconstructed Planes and Sizes for Each \locvar{\pli}}
7247 \label{tab:recp}
7248 \end{table}
7250 \item
7251 Assign \locvar{BX} the horizontal pixel index of the lower-left corner of the
7252 block \locvar{\bi}.
7253 \item
7254 Assign \locvar{BY} the vertical pixel index of the lower-left corner of the
7255 block \locvar{\bi}.
7256 \item
7257 If \locvar{BX} is greater than zero:
7258 \begin{enumerate}
7259 \item
7260 Assign \locvar{FX} the value $(\locvar{BX}-2)$.
7261 \item
7262 Assign \locvar{FY} the value \locvar{BY}.
7263 \item
7264 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7265 horizontal block filter to the left edge of block \locvar{\bi} with the
7266 procedure described in Section~\ref{sub:filth}.
7267 \end{enumerate}
7268 \item
7269 If \locvar{BY} is greater than zero:
7270 \begin{enumerate}
7271 \item
7272 Assign \locvar{FX} the value \locvar{BX}.
7273 \item
7274 Assign \locvar{FY} the value $(\locvar{BY}-2)$
7275 \item
7276 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7277 vertical block filter to the bottom edge of block \locvar{\bi} with the
7278 procedure described in Section~\ref{sub:filtv}.
7279 \end{enumerate}
7280 \item
7281 If $(\locvar{BX}+8)$ is less than \locvar{RPW} and
7282 $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order
7283 index of the block adjacent to \locvar{\bi} on the right:
7284 \begin{enumerate}
7285 \item
7286 Assign \locvar{FX} the value $(\locvar{BX}+6)$.
7287 \item
7288 Assign \locvar{FY} the value \locvar{BY}.
7289 \item
7290 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7291 horizontal block filter to the right edge of block \locvar{\bi} with the
7292 procedure described in Section~\ref{sub:filth}.
7293 \end{enumerate}
7294 \item
7295 If $(\locvar{BY}+8)$ is less than \locvar{RPH} and
7296 $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order
7297 index of the block adjacent to \locvar{\bi} above:
7298 \begin{enumerate}
7299 \item
7300 Assign \locvar{FX} the value \locvar{BX}.
7301 \item
7302 Assign \locvar{FY} the value $(\locvar{BY}+6)$
7303 \item
7304 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7305 vertical block filter to the top edge of block \locvar{\bi} with the
7306 procedure described in Section~\ref{sub:filtv}.
7307 \end{enumerate}
7308 \end{enumerate}
7309 \end{enumerate}
7310 \end{enumerate}
7312 \paragraph{VP3 Compatibility}
7314 The original VP3 decoder implemented unrestricted motion vectors by enlarging
7315 the reconstructed frame buffers and repeating the pixels on its edges into the
7316 padding region.
7317 However, for the previous reference frame this padding ocurred before the loop
7318 filter was applied, but for the golden reference frame it occurred afterwards.
7320 This means that for the previous reference frame, the padding values were
7321 required to be stored separately from the main image values.
7322 Furthermore, even if the previous and golden reference frames were in fact the
7323 same frame, they could have different padding values.
7324 Finally, the encoder did not apply the loop filter at all, which resulted in
7325 artifacts, particularly in near-static scenes, due to prediction-loop
7326 mismatch.
7327 This last can only be considered a bug in the VP3 encoder.
7329 Given all these things, Theora now uniformly applies the loop filter before
7330 the reference frames are padded.
7331 This means it is possible to use the same buffer for the previous and golden
7332 reference frames when they do indeed refer to the same frame.
7333 It also means that on architectures where memory bandwidth is limited, it is
7334 possible to avoid storing padding values, and simply clamp the motion vectors
7335 applied to each pixel as described in Sections~\ref{sub:predfullpel}
7336 and~\ref{sub:predhalfpel}.
7337 This means that the predicted pixel values along the edges of the frame might
7338 differ slightly between VP3 and Theora, but since the VP3 encoder did not
7339 apply the loop filter in the first place, this is not likely to impose any
7340 serious compatibility issues.
7342 \section{Complete Frame Decode}
7344 \paragraph{Input parameters:}\hfill\\*
7345 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7346 \multicolumn{1}{c}{Name} &
7347 \multicolumn{1}{c}{Type} &
7348 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7349 \multicolumn{1}{c}{Signed?} &
7350 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7351 \bitvar{FMBW} & Integer & 16 & No & The width of the frame in macro
7352 blocks. \\
7353 \bitvar{FMBH} & Integer & 16 & No & The height of the frame in macro
7354 blocks. \\
7355 \bitvar{NSBS} & Integer & 32 & No & The total number of super blocks in a
7356 frame. \\
7357 \bitvar{NBS} & Integer & 36 & No & The total number of blocks in a
7358 frame. \\
7359 \bitvar{NMBS} & Integer & 32 & No & The total number of macro blocks in a
7360 frame. \\
7361 \bitvar{FRN} & Integer & 32 & No & The frame-rate numerator. \\
7362 \bitvar{FRD} & Integer & 32 & No & The frame-rate denominator. \\
7363 \bitvar{PARN} & Integer & 24 & No & The pixel aspect-ratio numerator. \\
7364 \bitvar{PARD} & Integer & 24 & No & The pixel aspect-ratio
7365 denominator. \\
7366 \bitvar{CS} & Integer & 8 & No & The color space. \\
7367 \bitvar{PF} & Integer & 2 & No & The pixel format. \\
7368 \bitvar{NOMBR} & Integer & 24 & No & The nominal bitrate of the stream, in
7369 bits per second. \\
7370 \bitvar{QUAL} & Integer & 6 & No & The quality hint. \\
7371 \bitvar{KFGSHIFT} & Integer & 5 & No & The amount to shift the key frame
7372 number by in the granule position. \\
7373 \bitvar{LFLIMS} & \multicolumn{1}{p{40pt}}{Integer array} &
7374 7 & No & A 64-element array of loop filter
7375 limit values. \\
7376 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
7377 16 & No & A 64-element array of scale values
7378 for AC coefficients for each \qi\ value. \\
7379 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
7380 16 & No & A 64-element array of scale values
7381 for the DC coefficient for each \qi\ value. \\
7382 \bitvar{NBMS} & Integer & 10 & No & The number of base matrices. \\
7383 \bitvar{BMS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
7384 8 & No & A $\bitvar{NBMS}\times 64$ array
7385 containing the base matrices. \\
7386 \bitvar{NQRS} & \multicolumn{1}{p{50pt}}{2D Integer array} &
7387 6 & No & A $2\times 3$ array containing the
7388 number of quant ranges for a given \qti\ and \pli, respectively.
7389 This is at most $63$. \\
7390 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
7391 6 & No & A $2\times 3\times 63$ array of the
7392 sizes of each quant range for a given \qti\ and \pli, respectively.
7393 Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\
7394 \bitvar{QRBMIS} & \multicolumn{1}{p{50pt}}{3D Integer array} &
7395 9 & No & A $2\times 3\times 64$ array of the
7396 \bmi's used for each quant range for a given \qti\ and \pli, respectively.
7397 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\
7398 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
7399 & An 80-element array of Huffman tables
7400 with up to 32 entries each. \\
7401 \bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7402 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7403 array containing the contents of the $Y'$ plane of the golden reference
7404 frame. \\
7405 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7406 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7407 array containing the contents of the $C_b$ plane of the golden reference
7408 frame. \\
7409 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7410 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7411 array containing the contents of the $C_r$ plane of the golden reference
7412 frame. \\
7413 \bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7414 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7415 array containing the contents of the $Y'$ plane of the previous reference
7416 frame. \\
7417 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7418 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7419 array containing the contents of the $C_b$ plane of the previous reference
7420 frame. \\
7421 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7422 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7423 array containing the contents of the $C_r$ plane of the previous reference
7424 frame. \\
7425 \bottomrule\end{tabularx}
7427 \paragraph{Output parameters:}\hfill\\*
7428 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7429 \multicolumn{1}{c}{Name} &
7430 \multicolumn{1}{c}{Type} &
7431 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7432 \multicolumn{1}{c}{Signed?} &
7433 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7434 \bitvar{RECY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7435 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7436 array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7437 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7438 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7439 array containing the contents of the $C_b$ plane of the reconstructed
7440 frame. \\
7441 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7442 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7443 array containing the contents of the $C_r$ plane of the reconstructed
7444 frame. \\
7445 \bitvar{GOLDREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7446 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7447 array containing the contents of the $Y'$ plane of the golden reference
7448 frame. \\
7449 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7450 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7451 array containing the contents of the $C_b$ plane of the golden reference
7452 frame. \\
7453 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7454 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7455 array containing the contents of the $C_r$ plane of the golden reference
7456 frame. \\
7457 \bitvar{PREVREFY} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7458 8 & No & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7459 array containing the contents of the $Y'$ plane of the previous reference
7460 frame. \\
7461 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7462 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7463 array containing the contents of the $C_b$ plane of the previous reference
7464 frame. \\
7465 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7466 8 & No & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7467 array containing the contents of the $C_r$ plane of the previous reference
7468 frame. \\
7469 \bottomrule\end{tabularx}
7471 \paragraph{Variables used:}\hfill\\*
7472 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7473 \multicolumn{1}{c}{Name} &
7474 \multicolumn{1}{c}{Type} &
7475 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7476 \multicolumn{1}{c}{Signed?} &
7477 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7478 \locvar{FTYPE} & Integer & 1 & No & The frame type. \\
7479 \locvar{NQIS} & Integer & 2 & No & The number of \qi\ values. \\
7480 \locvar{QIS} & \multicolumn{1}{p{40pt}}{Integer array} &
7481 6 & No & An \locvar{NQIS}-element array of
7482 \qi\ values. \\
7483 \locvar{BCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
7484 1 & No & An \bitvar{NBS}-element array of flags
7485 indicating which blocks are coded. \\
7486 \locvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
7487 3 & No & An \bitvar{NMBS}-element array of
7488 coding modes for each macro block. \\
7489 \locvar{MVECTS} & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
7490 6 & Yes & An \bitvar{NBS}-element array of motion
7491 vectors for each block. \\
7492 \locvar{QIIS} & \multicolumn{1}{p{40pt}}{Integer Array} &
7493 2 & No & An \bitvar{NBS}-element array of
7494 \locvar{\qii} values for each block. \\
7495 \locvar{COEFFS} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7496 16 & Yes & An $\bitvar{NBS}\times 64$ array of
7497 quantized DCT coefficient values for each block in zig-zag order. \\
7498 \locvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
7499 7 & No & An \bitvar{NBS}-element array of the
7500 coefficient count for each block. \\
7501 \bitvar{RPYW} & Integer & 20 & No & The width of the $Y'$ plane of the
7502 reference frames in pixels. \\
7503 \bitvar{RPYH} & Integer & 20 & No & The height of the $Y'$ plane of the
7504 reference frames in pixels. \\
7505 \bitvar{RPCW} & Integer & 20 & No & The width of the $C_b$ and $C_r$
7506 planes of the reference frames in pixels. \\
7507 \bitvar{RPCH} & Integer & 20 & No & The height of the $C_b$ and $C_r$
7508 planes of the reference frames in pixels. \\
7509 \locvar{\bi} & Integer & 36 & No & The index of the current block in coded
7510 order. \\
7511 \bottomrule\end{tabularx}
7512 \medskip
7514 This procedure uses all the procedures defined in the previous section of this
7515 chapter to decode and reconstruct a complete frame.
7516 As a special case, a 0-byte packet is treated exactly like an inter frame with
7517 no coded blocks.
7518 It takes as input values decoded from the headers, as well as the current
7519 reference frames.
7520 As output, it gives the uncropped, reconstructed frame.
7521 This should be cropped to picture region before display.
7523 \begin{enumerate}
7524 \item
7525 If the size of the data packet is non-zero:
7526 \begin{enumerate}
7527 \item
7528 Decode the frame header values \locvar{FTYPE}, \locvar{NQIS}, and \locvar{QIS}
7529 using the procedure given in Section~\ref{sub:frame-header}.
7530 \item
7531 Using \locvar{FTYPE}, \bitvar{NSBS}, and \bitvar{NBS}, decode the list of coded
7532 block flags into \locvar{BCODED} using the procedure given in
7533 Section~\ref{sub:coded-blocks}.
7534 \item
7535 Using \locvar{FTYPE}, \bitvar{NMBS}, \bitvar{NBS}, and \bitvar{BCODED}, decode
7536 the macro block coding modes into \locvar{MBMODES} using the procedure given
7537 in Section~\ref{sub:mb-modes}.
7538 \item
7539 If \locvar{FTYPE} is non-zero (inter frame), using \bitvar{PF}, \bitvar{NMBS},
7540 \locvar{MBMODES}, \bitvar{NBS}, and \locvar{BCODED}, decode the motion vectors
7541 into \locvar{MVECTS} using the procedure given in Section~\ref{sub:mv-decode}.
7542 \item
7543 Using \bitvar{NBS}, \locvar{BCODED}, and \locvar{NQIS}, decode the block-level
7544 \qi\ values into \locvar{QIIS} using the procedure given in
7545 Section~\ref{sub:block-qis}.
7546 \item
7547 Using \bitvar{NBS}, \bitvar{NMBS}, \locvar{BCODED}, and \bitvar{HTS}, decode
7548 the DCT coefficients into \locvar{NCOEFFS} and \locvar{NCOEFFS} using the
7549 procedure given in Section~\ref{sub:dct-coeffs}.
7550 \item
7551 Using \locvar{BCODED}, \locvar{MBMODES}, undo the DC prediction on the DC
7552 coefficients stored in \locvar{COEFFS} using the procedure given in
7553 Section~\ref{sub:dc-pred-undo}.
7554 \end{enumerate}
7555 \item
7556 Otherwise:
7557 \begin{enumerate}
7558 \item
7559 Assign \locvar{FTYPE} the value 1 (inter frame).
7560 \item
7561 Assign \locvar{NQIS} the value 1.
7562 \item
7563 Assign $\locvar{QIS}[0]$ the value 63.
7564 \item
7565 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign
7566 $\locvar{BCODED}[\locvar{\bi}]$ the value zero.
7567 \end{enumerate}
7568 \item
7569 Assign \locvar{RPYW} and \locvar{RPYH} the values $(16*\bitvar{FMBW})$ and
7570 $(16*\bitvar{FMBH})$, respectively.
7571 \item
7572 Assign \locvar{RPCW} and \locvar{RPCH} the values from the row of
7573 Table~\ref{tab:rpcwh-for-pf} corresponding to \bitvar{PF}.
7575 \begin{table}[tb]
7576 \begin{center}
7577 \begin{tabular}{crr}\toprule
7578 \bitvar{PF} & \multicolumn{1}{c}{\locvar{RPCW}}
7579 & \multicolumn{1}{c}{\locvar{RPCH}} \\\midrule
7580 $0$ & $8*\bitvar{FMBW}$ & $8*\bitvar{FMBH}$ \\
7581 $2$ & $8*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\
7582 $3$ & $16*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\
7583 \bottomrule\end{tabular}
7584 \end{center}
7585 \caption{Width and Height of Chroma Planes for each Pixel Format}
7586 \label{tab:rpcwh-for-pf}
7587 \end{table}
7589 \item
7590 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
7591 \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{NBS}, \locvar{BCODED},
7592 \locvar{MBMODES}, \locvar{MVECTS}, \locvar{COEFFS}, \locvar{NCOEFFS},
7593 \locvar{QIS}, \locvar{QIIS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW},
7594 \locvar{RPCH}, \bitvar{GOLDREFY}, \bitvar{GOLDREFCB}, \bitvar{GOLDREFCR},
7595 \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR}, reconstruct the
7596 complete frame into \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR} using
7597 the procedure given in Section~\ref{sub:recon}.
7598 \item
7599 Using \bitvar{LFLIMS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW},
7600 \locvar{RPCH}, \bitvar{NBS}, \locvar{BCODED}, and \locvar{QIS}, apply the loop
7601 filter to the reconstructed frame in \bitvar{RECY}, \bitvar{RECCB}, and
7602 \bitvar{RECCR} using the procedure given in Section~\ref{sub:loop-filt}.
7603 \item
7604 If \locvar{FTYPE} is zero (intra frame), assign \bitvar{GOLDREFY},
7605 \bitvar{GOLDREFCB}, and \bitvar{GOLDREFCR} the values \bitvar{RECY},
7606 \bitvar{RECCB}, and \bitvar{RECCR}, respectively.
7607 \item
7608 Assign \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR} the values
7609 \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR}, respectively.
7610 \end{enumerate}
7612 %\backmatter
7613 \appendix
7615 \chapter{Ogg Bitstream Encapsulation}
7616 \label{app:oggencapsulation}
7618 \section{Overview}
7620 This document specifies the embedding or encapsulation of Theora packets
7621 in an Ogg transport stream.
7623 Ogg is a stream oriented wrapper for coded, linear time-based data.
7624 It provides syncronization, multiplexing, framing, error detection and
7625 seeking landmarks for the decoder and complements the raw packet format
7626 used by the Theora codec.
7628 This document assumes familiarity with the details of the Ogg standard.
7629 The Xiph.org documentation provides an overview of the Ogg transport stream
7630 format at \url{http://www.xiph.org/ogg/doc/oggstream.html} and a detailed
7631 description at \url{http://www.xiph.org/ogg/doc/framing.html}.
7632 The format is also defined in RFC~3533 \cite{rfc3533}.
7633 While Theora packets can be embedded in a wide variety of media
7634 containers and streaming mechanisms, the Xiph.org Foundation
7635 recommends Ogg as the native format for Theora video in file-oriented
7636 storage and transmission contexts.
7638 \subsection{MIME type}
7640 The correct MIME type of any Ogg file is {\tt application/ogg}.
7641 Outside of an encapsulation, the mime type {\tt video/x-theora} may
7642 be used to refer specifically to the Theora compressed video stream.
7644 \section{Embedding in a logical bitstream}
7646 Ogg separates a {\em logical bitstream} consisting of the framing of
7647 a particular sequence of packets and complete within itself from
7648 the {\em physical bitstream} which may consist either of a single
7649 logical bitstream or a number of logical bitstreams multiplexed
7650 together.
7651 This section specifies the embedding of Theora packets in a logical Ogg
7652 bitstream.
7653 The mapping of Ogg Theora logical bitstreams into a multiplexed physical Ogg
7654 stream is described in the next section.
7656 \subsection{Headers}
7658 The initial info header packet appears by itself in a single Ogg page.
7659 This page defines the start of the logical stream and MUST have
7660 the `beginning of stream' flag set.
7662 The second and third header packets (metadata comments and decoder
7663 setup data) can together span one or more Ogg pages.
7664 If there are additional non-normative header packets, they MUST be
7665 included in this sequence of pages as well.
7666 The comment header packet MUST begin the second Ogg page in the logical
7667 bitstream, and there MUST be a page break between the last header
7668 packet and the first frame data packet.
7670 These two page break requirements facilitate stream identification and
7671 simplify header acquisition for seeking and live streaming applications.
7673 All header pages MUST have their granule position field set to zero.
7674 %TODO: or -1?
7675 %TBT: What are we doing now?
7677 \subsection{Frame data}
7679 The first frame data packet in a logical bitstream MUST begin a fresh page.
7680 All other data packets are placed one at a time into Ogg pages
7681 until the end of the stream.
7682 Packets can span pages and multiple packets can be placed within any
7683 one page.
7684 The last page in the logical bitstream MUST have its `end of stream'
7685 flag set.
7687 Frame data pages MUST be marked with a granule index corresponding to
7688 the display time of the last frame/packet that finishes in that page.
7690 {\bf Note:}
7691 This scheme is still under discussion.
7692 It has also been proposed that pages be labeled with a granule corresponding to
7693 the first frame that begins on that page.
7694 This simplifies seeking and mux, but is different from the published
7695 definition of the Ogg granule field.
7696 This document will be updated when the issue is settled.
7698 %TODO: \subsection{Granule position}
7700 \section{Multiplexed stream mapping}
7702 Applications supporting Ogg Theora I must support Theora bitstreams
7703 multiplexed with compressed audio data in the Vorbis I and Speex
7704 formats, and should support Ogg-encapsulated MNG graphics for overlays.
7705 % and the Writ format for text-based titling.
7706 %TBT: That's great... do these things have specifications?
7708 Multiple audio and video bitstreams may be multiplexed together.
7709 How playback of multiple/alternate streams is handled is up to the
7710 application.
7711 Some conventions based on included metadata aide interoperability
7712 in this respect.
7713 %TODO: describe multiple vs. alternate streams, language mapping
7714 % and reference metadata descriptions.
7716 \subsection{Chained streams}
7718 Ogg Theora decoders and playback applications MUST support both grouped
7719 streams (multiplexed concurrent logical streams) and chained streams
7720 (sequential concatenation of independent physical bitstreams).
7722 The number and codec data types of multiplexed streams and the decoder
7723 parameters for those stream types that re-occur can all change at a
7724 chaining boundary.
7725 A playback application MUST be prepared to handle such changes and
7726 SHOULD do so smoothly with the minimum possible visible disruption.
7727 The specification of grouped streams below applies independently to each
7728 segment of a chained bitstream.
7730 \subsection{Grouped streams}
7732 At the beginning of a multiplexed stream, the `beginning of stream'
7733 pages for each logical bitstream will be grouped together.
7734 Within these, the first page to occur MUST be the Theora page.
7735 This facilitates identification of Ogg Theora files among other
7736 Ogg-encapsulated content.
7737 A playback application must nevertheless handle streams where this
7738 arrangement is not correct.
7739 %TBT: Then what's the point of requiring it in the spec?
7741 If there is more than one Theora logical stream, the first page should
7742 be from the primary stream.
7743 That is, the best choice for the stream a generic player should begin
7744 displaying without special user direction.
7745 If there is more than one audio stream, or of any other stream
7746 type, the identification page of the primary stream of that type
7747 should be placed before the others.
7748 %TBT: That's all pretty vague.
7750 After the `beginning of stream' pages, the header pages of each of
7751 the logical streams should be grouped together before any data pages
7752 occur.
7753 %TBT: should or must?
7755 After all the header pages have been placed,
7756 the data pages are multiplexed together.
7757 They should be placed in the stream in increasing order by the playback
7758 time equivalents of their granule fields.
7759 This facilitates seeking while limiting the buffering requirements of the
7760 playback demultiplexer.
7761 %TODO: A lot of this language is encoder-oriented.
7762 %TODO: We define a decoder-oriented specification.
7763 %TODO: The language should be changed to match.
7765 \cleardoublepage
7766 \chapter{VP3}
7768 \section{VP3 Compatibility}
7769 \label{app:vp3-compat}
7770 This section lists all of the encoder and decoder issues that may affect VP3
7771 compatibly.
7772 Each is described in more detail in the text itself.
7773 This list is provided merely for reference.
7775 \begin{itemize}
7776 \item
7777 Bitstream headers (Section~\ref{sec:headers}).
7778 \begin{itemize}
7779 \item
7780 Identification header (Section~\ref{sec:idheader}).
7781 \begin{itemize}
7782 \item
7783 Non-multiple of 16 picture sizes.
7784 \item
7785 Standardized color spaces.
7786 \item
7787 Support for $4:4:4$ and $4:2:2$ pixel formats.
7788 \end{itemize}
7789 \item
7790 Setup header
7791 \begin{itemize}
7792 \item
7793 Loop filter limit values (Section~\ref{sub:loop-filter-limits}).
7794 \item
7795 Quantization parameters (Section~\ref{sub:quant-params}).
7796 \item
7797 Huffman tables (Section~\ref{sub:huffman-tables}).
7798 \end{itemize}
7799 \end{itemize}
7800 \item
7801 Frame header format (Section~\ref{sub:frame-header}).
7802 \item
7803 Extended long-run bit strings (Section~\ref{sub:long-run}).
7804 \item
7805 INTER\_MV\_FOUR handling of uncoded blocks (Section~\ref{sub:mb-mv-decode}).
7806 \item
7807 Block-level \qi\ values (Section~\ref{sub:block-qis}).
7808 \item
7809 Zero-length EOB runs (Section~\ref{sub:eob-token}).
7810 \item
7811 Unrestricted motion vector padding and the loop filter
7812 (Section~\ref{sub:loop-filt}).
7813 \end{itemize}
7815 \section{Loop Filter Limit Values}
7816 \label{app:vp3-loop-filter-limits}
7818 The hard-coded loop filter limit values used in VP3 are defined as follows:
7819 \begin{align*}
7820 \bitvar{LFLIMS} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7821 \{ & 30, & 25, & 20, & 20, & 15, & 15, & 14, & 14, & \\
7822 & 13, & 13, & 12, & 12, & 11, & 11, & 10, & 10, & \\
7823 & 9, & 9, & 8, & 8, & 7, & 7, & 7, & 7, & \\
7824 & 6, & 6, & 6, & 6, & 5, & 5, & 5, & 5, & \\
7825 & 4, & 4, & 4, & 4, & 3, & 3, & 3, & 3, & \\
7826 & 2, & 2, & 2, & 2, & 2, & 2, & 2, & 2, & \\
7827 & 0, & 0, & 0, & 0, & 0, & 0, & 0, & 0, & \\
7828 & 0, & 0, & 0, & 0, & 0, & 0, & 0, & 0\;\ & \!\} \\
7829 \end{array}
7830 \end{align*}
7832 \section{Quantization Parameters}
7833 \label{app:vp3-quant-params}
7835 The hard-coded quantization parameters used by VP3 are defined as follows:
7837 \begin{align*}
7838 \bitvar{ACSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7839 \{ & 500, & 450, & 400, & 370, & 340, & 310, & 285, & 265, & \\
7840 & 245, & 225, & 210, & 195, & 185, & 180, & 170, & 160, & \\
7841 & 150, & 145, & 135, & 130, & 125, & 115, & 110, & 107, & \\
7842 & 100, & 96, & 93, & 89, & 85, & 82, & 75, & 74, & \\
7843 & 70, & 68, & 64, & 60, & 57, & 56, & 52, & 50, & \\
7844 & 49, & 45, & 44, & 43, & 40, & 38, & 37, & 35, & \\
7845 & 33, & 32, & 30, & 29, & 28, & 25, & 24, & 22, & \\
7846 & 21, & 19, & 18, & 17, & 15, & 13, & 12, & 10\;\ & \!\} \\
7847 \end{array} \\
7848 \bitvar{DCSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7849 \{ & 220, & 200, & 190, & 180, & 170, & 170, & 160, & 160, & \\
7850 & 150, & 150, & 140, & 140, & 130, & 130, & 120, & 120, & \\
7851 & 110, & 110, & 100, & 100, & 90, & 90, & 90, & 80, & \\
7852 & 80, & 80, & 70, & 70, & 70, & 60, & 60, & 60, & \\
7853 & 60, & 50, & 50, & 50, & 50, & 40, & 40, & 40, & \\
7854 & 40, & 40, & 30, & 30, & 30, & 30, & 30, & 30, & \\
7855 & 30, & 20, & 20, & 20, & 20, & 20, & 20, & 20, & \\
7856 & 20, & 10, & 10, & 10, & 10, & 10, & 10, & 10\;\ & \!\} \\
7857 \end{array}
7858 \end{align*}
7860 VP3 defines only a single quantization range for each quantization type and
7861 color plane, and the base matrix used is constant throughout the range.
7862 There are three base matrices defined.
7863 The first is used for the $Y'$ channel of INTRA mode blocks, and the second for
7864 both the $C_b$ and $C_r$ channels of INTRA mode blocks.
7865 The last is used for INTER mode blocks of all channels.
7867 \begin{align*}
7868 \bitvar{BMS} = \{ & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7869 \{ & 16, & 11, & 10, & 16, & 24, & 40, & 51, & 61, & \\
7870 & 12, & 12, & 14, & 19, & 26, & 58, & 60, & 55, & \\
7871 & 14, & 13, & 16, & 24, & 40, & 57, & 69, & 56, & \\
7872 & 14, & 17, & 22, & 29, & 51, & 87, & 80, & 62, & \\
7873 & 18, & 22, & 37, & 58, & 68, & 109, & 103, & 77, & \\
7874 & 24, & 35, & 55, & 64, & 81, & 104, & 113, & 92, & \\
7875 & 49, & 64, & 78, & 87, & 103, & 121, & 120, & 101, & \\
7876 & 72, & 92, & 95, & 98, & 112, & 100, & 103, & 99\;\ & \!\}, \\
7877 %\end{array} \\
7878 %& \begin{array}[t]{r@{}rrrrrrrr@{}l}
7879 \{ & 17, & 18, & 24, & 47, & 99, & 99, & 99, & 99, & \\
7880 & 18, & 21, & 26, & 66, & 99, & 99, & 99, & 99, & \\
7881 & 24, & 26, & 56, & 99, & 99, & 99, & 99, & 99, & \\
7882 & 47, & 66, & 99, & 99, & 99, & 99, & 99, & 99, & \\
7883 & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\
7884 & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\
7885 & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99, & \\
7886 & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99\;\ & \!\}, \\
7887 %\end{array} \\
7888 %& \begin{array}[t]{r@{}rrrrrrrr@{}l}
7889 \{ & 16, & 16, & 16, & 20, & 24, & 28, & 32, & 40, & \\
7890 & 16, & 16, & 20, & 24, & 28, & 32, & 40, & 48, & \\
7891 & 16, & 20, & 24, & 28, & 32, & 40, & 48, & 64, & \\
7892 & 20, & 24, & 28, & 32, & 40, & 48, & 64, & 64, & \\
7893 & 24, & 28, & 32, & 40, & 48, & 64, & 64, & 64, & \\
7894 & 28, & 32, & 40, & 48, & 64, & 64, & 64, & 96, & \\
7895 & 32, & 40, & 48, & 64, & 64, & 64, & 96, & 128, & \\
7896 & 40, & 48, & 64, & 64, & 64, & 96, & 128, & 128\;\ & \!\}\;\;\} \\
7897 \end{array}
7898 \end{align*}
7900 The remaining parameters simply assign these matrices to the proper quant
7901 ranges.
7903 \begin{align*}
7904 \bitvar{NQRS} = & \{ \{1, 1, 1\}, \{1, 1, 1\} \} \\
7905 \bitvar{QRSIZES} = &
7906 \{ \{ \{1\}, \{1\}, \{1\} \}, \{ \{1\}, \{1\}, \{1\} \} \} \\
7907 \bitvar{QRBMIS} = &
7908 \{ \{ \{0, 0\}, \{1, 1\}, \{1, 1\} \}, \{ \{2, 2\}, \{2, 2\}, \{2, 2\} \} \} \\
7909 \end{align*}
7911 \section{Huffman Tables}
7912 \label{app:vp3-huffman-tables}
7914 The following tables contain the hard-coded Huffman codes used by VP3.
7915 There are 80 tables in all, each with a Huffman code for all 32 token values.
7916 The tokens are sorted by the most significant bits of their Huffman code.
7917 This is the same order in which they will be decoded from the setup header.
7919 \include{vp3huff}
7921 \cleardoublepage
7922 \chapter{Colophon}
7924 Ogg is a \href{http://www.xiph.org}{Xiph.org Foundation} effort to protect
7925 essential tenets of Internet multimedia from corporate hostage-taking; Open
7926 Source is the net's greatest tool to keep everyone honest.
7927 See \href{http://www.xiph.org/about.html}{About the Xiph.org Foundation} for
7928 details.
7930 Ogg Theora is the first Ogg video codec.
7931 Anyone may freely use and distribute the Ogg and Theora specifications, whether
7932 in private, public, or corporate capacity.
7933 However, the Xiph.org Foundation and the Ogg project reserve the right to set
7934 the Ogg Theora specification and certify specification compliance.
7936 Xiph.org's Theora software codec implementation is distributed under a BSD-like
7937 license.
7938 This does not restrict third parties from distributing independent
7939 implementations of Theora software under other licenses.
7941 \begin{wrapfigure}{l}{0pt}
7942 \includegraphics[width=2.5cm]{xifish}
7943 \end{wrapfigure}
7945 These pages are copyright \textcopyright{} 2004 Xiph.org Foundation.
7946 All rights reserved.
7947 Ogg, Theora, Vorbis, Xiph.org Foundation and their logos are trademarks
7948 (\texttrademark) of the \href{http://www.xiph.org}{Xiph.org Foundation}.
7950 This document is set in \LaTeX.
7954 \cleardoublepage
7955 \bibliography{spec}
7957 \end{document}