theora/doc/spec/spec.tex

   1 \documentclass[9pt,letterpaper]{book}
   2
   3 \usepackage{latexsym}
   4 \usepackage{amssymb}
   5 \usepackage{amsmath}
   6 \usepackage{bm}
   7 \usepackage{textcomp}
   8 \usepackage{graphicx}
   9 \usepackage{booktabs}
  10 \usepackage{tabularx}
  11 \usepackage{longtable}
  12 \usepackage{ltablex}
  13 \usepackage{wrapfig}
  14 \usepackage[pdfpagemode=None,pdfstartview=FitH,pdfview=FitH,colorlinks=true]%
  15  {hyperref}
  16
  17 \newtheorem{theorem}{Theorem}[section]
  18 \newcommand{\idx}[1]{{\ensuremath{\mathit{#1}}}}
  19 \newcommand{\qti}{\idx{qti}}
  20 \newcommand{\qtj}{\idx{qtj}}
  21 \newcommand{\pli}{\idx{pli}}
  22 \newcommand{\plj}{\idx{plj}}
  23 \newcommand{\qi}{\idx{qi}}
  24 \newcommand{\ci}{\idx{ci}}
  25 \newcommand{\bmi}{\idx{bmi}}
  26 \newcommand{\bmj}{\idx{bmj}}
  27 \newcommand{\qri}{\idx{qri}}
  28 \newcommand{\qrj}{\idx{qrj}}
  29 \newcommand{\hti}{\idx{hti}}
  30 \newcommand{\sbi}{\idx{sbi}}
  31 \newcommand{\bi}{\idx{bi}}
  32 \newcommand{\bj}{\idx{bj}}
  33 \newcommand{\mbi}{\idx{mbi}}
  34 \newcommand{\mbj}{\idx{mbj}}
  35 \newcommand{\mi}{\idx{mi}}
  36 \newcommand{\cbi}{\idx{cbi}}
  37 \newcommand{\qii}{\idx{qii}}
  38 \newcommand{\ti}{\idx{ti}}
  39 \newcommand{\tj}{\idx{tj}}
  40 \newcommand{\rfi}{\idx{rfi}}
  41 \newcommand{\zzi}{\idx{zzi}}
  42 \newcommand{\ri}{\idx{ri}}
  43 %This somewhat odd construct ensures that \bitvar{\qi}, etc., will set the
  44 % qi in bold face, even though it is in a \mathit font, yet \bitvar{VAR} will
  45 % set VAR in a bold, roman font.
  46 \newcommand{\bitvar}[1]{\ensuremath{\mathbf{\bm{#1}}}}
  47 \newcommand{\locvar}[1]{\ensuremath{\mathrm{#1}}}
  48 \newcommand{\term}[1]{{\em #1}}
  49 \newcommand{\bin}[1]{\ensuremath{\mathtt{b#1}}}
  50 \newcommand{\hex}[1]{\ensuremath{\mathtt{0x#1}}}
  51 \newcommand{\ilog}{\ensuremath{\mathop{\mathrm{ilog}}\nolimits}}
  52 \newcommand{\round}{\ensuremath{\mathop{\mathrm{round}}\nolimits}}
  53 \newcommand{\sign}{\ensuremath{\mathop{\mathrm{sign}}\nolimits}}
  54 \newcommand{\lflim}{\ensuremath{\mathop{\mathrm{lflim}}\nolimits}}
  55
  56 %Section-based table, figure, and equation numbering.
  57 \numberwithin{equation}{chapter}
  58 \numberwithin{figure}{chapter}
  59 \numberwithin{table}{chapter}
  60
  61 \keepXColumns
  62
  63 \pagestyle{headings}
  64 \bibliographystyle{alpha}
  65
  66 \title{Theora I Specification}
  67 \author{Xiph.org Foundation}
  68 \date{\today}
  69
  70
  71 \begin{document}
  72
  73 \frontmatter
  74
  75 \begin{titlepage}
  76 \maketitle
  77 \end{titlepage}
  78 \thispagestyle{empty}
  79 \cleardoublepage
  80
  81 \pagenumbering{roman}
  82
  83 \thispagestyle{plain}
  84 \tableofcontents
  85 \cleardoublepage
  86
  87 \thispagestyle{plain}
  88 \listoffigures
  89 \cleardoublepage
  90
  91 \thispagestyle{plain}
  92 \listoftables
  93 \cleardoublepage
  94
  95 \thispagestyle{plain}
  96 \markboth{{\sc Notation and Conventions}}{{\sc Notation and Conventions}}
  97 \chapter*{Notation and Conventions}
  98
  99 All parameters either passed in or out of a decoding procedure are given in
 100  \bitvar{bold\ face}.
 101
 102 The prefix \bin{} indicates that the following value is to be interpreted as a
 103  binary number (base 2).
 104 \begin{verse}
 105 {\bf Example:} The value \bin{1110100} is equal to the decimal value 116.
 106 \end{verse}
 107
 108 The prefix \hex{} indicates the the following value is to be interpreted as a
 109  hexadecimal number (base 16).
 110 \begin{verse}
 111 {\bf Example:} The value \hex{74} is equal to the decimal value 116.
 112 \end{verse}
 113
 114 All arithmetic defined by this specification is exact.
 115 However, any real numbers that do arise will always be converted back to
 116  integers again in short order.
 117 The entire specification can be implemented using only normal integer
 118  operations.
 119 All operations are to be implemented with sufficiently large integers so that
 120  overflow cannot occur.
 121 Where the result of a computation is to be truncated to a fixed-sized binary
 122  representation, this will be explicitly noted.
 123 The size given for all variables is the maximum number of bits needed to store
 124  any value in that variable.
 125 Intermediate computations involving that variable may require more bits.
 126
 127 The following operators are defined:
 128
 129 \begin{description}
 130 \item[$|a|$]
 131 The absolute value of a number $a$.
 132 \begin{align*}
 133 |a| & = \left\{\begin{array}{ll}
 134 -a, & a < 0 \\
 135 a, & a \ge 0
 136 \end{array}\right.
 137 \end{align*}
 138
 139 \item[$a*b$]
 140 Multiplication of a number $a$ by a number $b$.
 141 \item[$\frac{a}{b}$]
 142 Exact division of a number $a$ by a number $b$, producing a potentially
 143  non-integer result.
 144
 145 \item[$\left\lfloor a\right\rfloor$]
 146 The largest integer less than or equal to a real number $a$.
 147
 148 \item[$\left\lceil a\right\rceil$]
 149 The smallest integer greater than or equal to a real number $a$.
 150
 151 \item[$a//b$]
 152 Integer division of $a$ by $b$.
 153 \begin{align*}
 154 a//b & = \left\{\begin{array}{ll}
 155 \left\lceil\frac{a}{b}\right\rceil, & a < 0 \\
 156 \left\lfloor\frac{a}{b}\right\rfloor, & a \ge 0
 157 \end{array}\right.
 158 \end{align*}
 159
 160 \item[$a\%b$]
 161 The remainder from the integer division of $a$ by $b$.
 162 \begin{align*}
 163 a\%b & = |a|-|b|*|a//b|
 164 \end{align*}
 165 Note that with this definition, the result is always non-negative and less than
 166  $|b|$.
 167
 168 \item[$a<<b$]
 169 The value obtained by left-shifting the two's complement integer $a$ by $b$
 170  bits.
 171 For purposes of this specification, overflow is ignored, and so this is
 172  equivalent to integer multiplication of $a$ by $2^b$.
 173
 174 \item[$a>>b$]
 175 The value obtained by right-shifting the two's complement integer $a$ by $b$
 176  bits, filling in the leftmost bits of the new value with $0$ if $a$ is
 177  non-negative and $1$ if $a$ is negative.
 178 This is {\em not} equivalent to integer division of $a$ by $2^b$.
 179 Instead,
 180 \begin{align*}
 181 a>>b & = \left\lfloor\frac{a}{2^b}\right\rfloor.
 182 \end{align*}
 183
 184 \item[$\round(a)$]
 185 Rounds a number $a$ to the nearest integer, with ties rounded away from $0$.
 186 \begin{align*}
 187 \round(a) = \left\{\begin{array}{ll}
 188 \lceil a-\frac{1}{2}\rceil   & a \le 0 \\
 189 \lfloor a+\frac{1}{2}\rfloor & a > 0
 190 \end{array}\right.
 191 \end{align*}
 192
 193 \item[$\sign(a)$]
 194 Returns the sign of a given number.
 195 \begin{align*}
 196 \sign(a) = \left\{\begin{array}{ll}
 197 -1 & a < 0 \\
 198 0  & a = 0 \\
 199 1  & a > 0
 200 \end{array}\right.
 201 \end{align*}
 202
 203 \item[$\ilog(a)$]
 204 The minimum number of bits required to store a positive integer $a$ in
 205  two's complement notation, or $0$ for a non-positive integer $a$.
 206 \begin{align*}
 207 \ilog(a) = \left\{\begin{array}{ll}
 208 0, & a \le 0 \\
 209 \left\lceil\log_2{a}\right\rceil, & a > 0
 210 \end{array}\right.
 211 \end{align*}
 212
 213 \begin{verse}
 214 {\bf Examples:}
 215 \begin{itemize}
 216 \item $\ilog(-1)=0$
 217 \item $\ilog(0)=0$
 218 \item $\ilog(1)=1$
 219 \item $\ilog(2)=2$
 220 \item $\ilog(3)=2$
 221 \item $\ilog(4)=3$
 222 \item $\ilog(7)=3$
 223 \end{itemize}
 224 \end{verse}
 225
 226 \item[$\min(a,b)$]
 227 The minimum of two numbers $a$ and $b$.
 228
 229 \item[$\max(a,b)$]
 230 The maximum of two numbers $a$ and $b$.
 231
 232 \end{description}
 233 \cleardoublepage
 234
 235
 236 \thispagestyle{plain}
 237 \markboth{{\sc Key words}}{{\sc Key words}}
 238 \chapter*{Key words}
 239
 240 %We can't rewrite this, because this is text required by RFC 2119, so we use
 241 % some emergency stretching to get it typeset properly.
 242 \setlength{\emergencystretch}{2em}
 243 The key words ``MUST'', ``MUST NOT'', ``REQUIRED'', ``SHALL'', ``SHALL NOT'',
 244  ``SHOULD'', ``SHOULD NOT'', ``RECOMMENDED'', ``MAY'', and ``OPTIONAL'' in this
 245  document are to be intrepreted as described in RFC 2119 \cite{rfc2119}.\par
 246 \setlength{\emergencystretch}{0em}
 247
 248 Where such assertions are placed on the contents of a Theora bitstream itself,
 249  implementations should be prepared to encounter bitstreams that do not follow
 250  these requirements.
 251 An application's behavior in the presecence of such non-conforming bitstreams
 252  is not defined by this specification, but any reasonable method of handling
 253  them MAY be used.
 254 By way of example, applications MAY discard the current frame, retain the
 255  current output thus far, or attempt to continue on by assuming some default
 256  values for the erroneous bits.
 257 When such an error occurs in the bitstream headers, an application MAY refuse
 258  to decode the entire stream.
 259 An application SHOULD NOT allow such non-conformant bitstreams to overflow
 260  buffers and potentially execute arbitrary code, as this represents a serious
 261  security risk.
 262
 263 An application MUST, however, ensure any bits marked as reserved have the value
 264  zero, and refuse to decode the stream if they do not.
 265 These are used as place holders for future bitstream features with which the
 266  current bitstream is forward-compatible.
 267 Such features may not increment the bitstream version number, and can only be
 268  recognized by checking the value of these reserved bits.
 269
 270 \cleardoublepage
 271
 272
 273
 274 \mainmatter
 275
 276 \pagenumbering{arabic}
 277 \setcounter{page}{1}
 278
 279 \chapter{Introduction}
 280
 281 Theora is a general purpose, lossy video codec.
 282 It is based on the VP3 video codec produced by On2 Technologies
 283  (\url{http://www.on2.com/}).
 284 On2 donated the VP3.1 source code to the Xiph.org Foundation and released it
 285  under a BSD-like license.
 286 On2 also made an irrevocable, royalty-free license grant for any patent claims
 287  it might have over the software and any derivatives.
 288 No formal specification exists for the VP3 format beyond this source code,
 289  however Mike Melanson maintains a detailed description \cite{Mel04}.
 290 Portions of this specification were adopted from that text with permission.
 291
 292 \section{VP3 and Theora}
 293
 294 Theora contains a superset of the features that were available in the original
 295  VP3 codec.
 296 Content encoded with VP3.1 can be losslessly transcoded into the Theora format.
 297 Theora content cannot, in general, be losslessly transcoded into the VP3
 298  format.
 299 If a feature is not available in the original VP3 format, this is mentioned
 300  when that feature is defined.
 301 A complete list of these features appears in Appendix~\ref{app:vp3-compat}.
 302 %TODO: VP3 - theora comparison in appendix
 303
 304 \section{Video Formats}
 305
 306 Theora I currently supports progressive video data of arbitrary dimensions at a
 307  constant frame rate in one of several $Y'C_bC_r$ color spaces.
 308 The precise definition the supported color spaces appears in
 309  Section~\ref{sec:colorspaces}.
 310 Three different chroma subsampling formats are supported: 4:2:0, 4:2:2,
 311  and 4:4:4.
 312 The precise details of each of these formats and their sampling locations are
 313  described in Section~\ref{sec:pixfmts}.
 314
 315 The Theora I format does not support interlaced material, variable frame rates,
 316  bit-depths larger than 8 bits per component, nor alternate color spaces such
 317  as RGB or arbitrary multi-channel spaces.
 318 Black and white content can be efficiently encoded, however, because the
 319  uniform chroma planes compress well.
 320 Support for interlaced material is planned for a future version.
 321 \begin{verse}
 322 {\bf Note:} Infrequently changing frame rates---as when film and video
 323  sequences are cut together---can be supported in the Ogg container format by
 324  chaining several Theora streams together.
 325 \end{verse}
 326 Support for increased bit depths or additional color spaces is not planned.
 327
 328 \section{Classification}
 329
 330 Theora I is a block-based lossy transform codec that utilizes an
 331  $8\times 8$ Type-II Discrete Cosine Transform and block-based motion
 332  compensation.
 333 This places it in the same class of codecs as MPEG-1, -2, -4, and H.263.
 334 The details of how individual blocks are organized and how DCT coefficients are
 335  stored in the bitstream differ substantially from these codecs, however.
 336 Theora supports only intra frames (I frames in MPEG) and inter frames (P frames
 337  in MPEG).
 338 There is no equivalent to the bi-predictive frames (B frames) found in MPEG
 339  codecs.
 340
 341 \section{Assumptions}
 342
 343 The Theora codec design assumes a complex, psychovisually-aware encoder and a
 344  simple, low-complexity decoder.
 345 %TODO: Talk more about implementation complexity.
 346
 347 Theora provides none of its own framing, synchronization, or protection against
 348  transmission errors.
 349 An encoder is solely a method of accepting input video frames and
 350  compressing these frames into raw, unformatted `packets'.
 351 The decoder then accepts these raw packets in sequence, decodes them, and
 352  synthesizes a fascimile of the original video frames.
 353 Theora is a free-form variable bit rate (VBR) codec, and packets have no
 354  minimum size, maximum size, or fixed/expected size.
 355
 356 Theora packets are thus intended to be used with a transport mechanism that
 357  provides free-form framing, synchronization, positioning, and error correction
 358  in accordance with these design assumptions, such as Ogg (for file transport)
 359  or RTP (for network multicast).
 360 For the purposes of a few examples in this document, we will assume that Theora
 361  is embedded in an Ogg stream specifically, although this is by no means a
 362  requirement or fundamental assumption in the Theora design.
 363
 364 The specification for embedding Theora into an Ogg transport stream is given in
 365  Appendix~\ref{app:oggencapsulation}.
 366
 367 \section{Codec Setup and Probability Model}
 368
 369 Theora's heritage is the proprietary commerical codec VP3, and it retains a
 370  fair amount of inflexibility when compared to Vorbis \cite{vorbis}, the first
 371  Xiph.org codec, which began as a research codec.
 372 However, to provide additional scope for encoder improvement, Theora adopts
 373  some of the configurable aspects of decoder setup that are present in Vorbis.
 374 This configuration data is not available in VP3, which uses hardcoded values
 375  instead.
 376
 377 Theora makes the same controversial design decision that Vorbis made to include
 378  the entire probability model for the DCT coefficients and all the quantization
 379  parameters in the bitstream headers.
 380 This is often several hundred fields.
 381 It is therefore impossible to decode any frame in the stream without
 382  having previously fetched the codec info and codec setup headers.
 383
 384 \begin{verse}
 385 {\bf Note:} Theora {\em can} initiate decode at an arbitrary intra-frame packet
 386  within a bitstream so long as the codec has been initialized with the setup
 387  headers.
 388 \end{verse}
 389
 390 Thus, Theora headers are both required for decode to begin and relatively large
 391  as bitstream headers go.
 392 The header size is unbounded, although as a rule-of-thumb less than 16kB is
 393  recommended, and Xiph.org's reference encoder follows this suggestion.
 394 %TODO: Is 8kB enough? My setup header is 7.4kB, that doesn't leave much room
 395 % for comments.
 396 %RG: the lesson from vorbis is that as small as possible is really
 397 % important in some applications. Practically, what's acceptable
 398 % depends a great deal on the target bitrate. I'd leave 16 kB in the
 399 % spec for now. fwiw more than 1k of comments is quite unusual.
 400
 401 Our own design work indicates that the primary liability of the required header
 402  is in mindshare; it is an unusual design and thus causes some amount of
 403  complaint among engineers as this runs against current design trends and
 404  points out limitations in some existing software/interface designs.
 405 However, we find that it does not fundamentally limit Theora's suitable
 406  application space.
 407
 408 %silvia: renamed
 409 %\subsection{Format Specification}
 410 \section{Format Conformance}
 411
 412 The Theora format is well-defined by its decode specification; any encoder that
 413  produces packets that are correctly decoded by an implementation following
 414  this specification may be considered a proper Theora encoder.
 415 A decoder must faithfully and completely implement the specification defined
 416  herein %, except where noted,
 417  to be considered a conformant Theora decoder.
 418 A decoder need not be implemented strictly as described, but the
 419  actual decoder process MUST be {\em entirely mathematically equivalent}
 420  to the described process.
 421 Where appropriate, a non-normative description of encoder processes is
 422  included.
 423 These sections will be marked as such, and a proper Theora encoder is not
 424  bound to follow them.
 425
 426 %TODO: \subsection{Hardware Profile}
 427
 428
 429 \chapter{Coded Video Structure}
 430
 431 Theora's encoding and decoding process is based on $8\times 8$ blocks of
 432  pixels.
 433 This sections describes how a video frame is laid out, divided into
 434  blocks, and how those blocks are organized.
 435
 436 \section{Frame Layout}
 437
 438 A video frame in Theora is a two-dimensional array of pixels.
 439 Theora, like VP3, uses a right-handed coordinate system, with the origin in the
 440  lower-left corner of the frame.
 441 This is contrary to many video formats which use a left-handed coordinate
 442  system with the origin in the upper-left corner of the frame.
 443 %INT: This means that for interlaced material, the definition of `even fields'
 444 %INT:  and `odd fields' may be reversed between Theora and other video codecs.
 445 %INT: This document will always refer to them as `top fields' and `bottom
 446 %INT:  fields'.
 447
 448 Theora divides the pixel array up into three separate \term{color planes}, one
 449  for each of the $Y'$, $C_b$, and $C_r$ components of the pixel.
 450 The $Y'$ plane is also called the \term{luma plane}, and the $C_b$ and $C_r$
 451  planes are also called the \term{chroma planes}.
 452 Each plane is assigned a numerical value, as shown in
 453  Table~\ref{tab:color-planes}.
 454
 455 \begin{table}[htbp]
 456 \begin{center}
 457 \begin{tabular}{cl}\toprule
 458 Index & Color Plane \\\midrule
 459 $0$   & $Y'$        \\
 460 $1$   & $C_b$       \\
 461 $2$   & $C_r$       \\
 462 \bottomrule\end{tabular}
 463 \end{center}
 464 \caption{Color Plane Indices}
 465 \label{tab:color-planes}
 466 \end{table}
 467
 468 In some pixel formats, the chroma planes are subsampled by a factor of two
 469  in one or both directions.
 470 This means that the width or height of the chroma planes may be half that of
 471  the total frame width and height.
 472 The luma plane is never subsampled.
 473
 474 \section{Picture Region}
 475
 476 An encoded video frame in Theora is required to have a width and height that
 477  are multiples of sixteen, making an integral number of blocks even when the
 478  chroma planes are subsampled.
 479 However, inside a frame a smaller \term{picture region} may be defined
 480  to present material whose dimensions are not a multiple of sixteen pixels, as
 481  shown in Figure~\ref{fig:pic-frame}.
 482 The picture region can be offset from the lower-left corner of the frame by up
 483  to 255 pixels in each direction, and may have an arbitrary width and height,
 484  provided that it is contained entirely within the coded frame.
 485 It is this picture region that contains the actual video data.
 486 The portions of the frame which lie outside the picture region may contain
 487  arbitrary image data, so the frame must be cropped to the picture region
 488  before display.
 489 The picture region plays no other role in the decode process, which operates on
 490  the entire video frame.
 491
 492 \begin{figure}[htbp]
 493 \begin{center}
 494 \includegraphics{pic-frame}
 495 \end{center}
 496 \caption{Location of frame and picture regions}
 497 \label{fig:pic-frame}
 498 \end{figure}
 499
 500 \section{Blocks and Super Blocks}
 501 \label{sec:blocks-and-sbs}
 502
 503 Each color plane is subdivided into \term{blocks} of $8\times 8$ pixels.
 504 Blocks are grouped into $4\times 4$ arrays called \term{super blocks} as
 505  shown in Figure~\ref{fig:superblock}.
 506 Each color plane has its own set of blocks and super blocks.
 507 If the chroma planes are subsampled, they are still divided into $8\times 8$
 508  blocks of pixels; there are just fewer blocks than in the luma plane.
 509 The boundaries of blocks and super blocks in the luma plane do not necessarily
 510  coincide with those of the chroma planes, if the chroma planes have been
 511  subsampled.
 512
 513 \begin{figure}[htbp]
 514 \begin{center}
 515 \includegraphics{superblock}
 516 \end{center}
 517 \caption{Subdivision of a frame into blocks and super blocks}
 518 \label{fig:superblock}
 519 \end{figure}
 520
 521 Blocks are accessed in two different orders in the various decoder processes.
 522 The first is \term{raster order}, illustrated in Figure~\ref{fig:raster-block}.
 523 This accesses each block in row-major order, starting in the lower left of the
 524  frame and continuing along the bottom row of the entire frame, followed by the
 525  next row up, starting on the left edge of the frame, etc.
 526
 527 \begin{figure}[htbp]
 528 \begin{center}
 529 \includegraphics{raster-block}
 530 \end{center}
 531 \caption{Raster ordering of $n\times m$ blocks}
 532 \label{fig:raster-block}
 533 \end{figure}
 534
 535 The second is \term{coded order}.
 536 In coded order, blocks are accessed by super block.
 537 Within each frame, super blocks are traversed in raster order,
 538  similar to raster order for blocks.
 539 Within each super block, however, blocks are accessed in a Hilbert curve
 540  pattern, illustrated in Figure~\ref{fig:hilbert-block}.
 541 If a color plane does not contain a complete super block on the top or right
 542  sides, the same ordering is still used, simply with any blocks outside the
 543  frame boundary ommitted.
 544
 545 \begin{figure}[htbp]
 546 \begin{center}
 547 \includegraphics{hilbert-block}
 548 \end{center}
 549 \caption{Hilbert curve ordering of blocks within a super block}
 550 \label{fig:hilbert-block}
 551 \end{figure}
 552
 553 To illustrate this ordering, consider a frame that is 240 pixels wide and
 554  48 pixels high.
 555 Each row of the luma plane has 30 blocks and 8 super blocks, and there are 6
 556  rows of blocks and two rows of super blocks.
 557
 558 %When accessed in raster order, each block in the luma plane is assigned the
 559 % following indices:
 560
 561 %\vspace{\baselineskip}
 562 %\begin{center}
 563 %\begin{tabular}{|ccccccc|}\hline
 564 %150 & 151 & 152 & 153 & $\ldots$ & 178 & 179 \\
 565 %120 & 121 & 122 & 123 & $\ldots$ & 148 & 149 \\\hline
 566 % 90 &  91 &  92 &  93 & $\ldots$ & 118 & 119 \\
 567 % 60 &  61 &  62 &  63 & $\ldots$ &  88 &  89 \\
 568 % 30 &  31 &  32 &  33 & $\ldots$ &  58 &  59 \\
 569 %  0 &   1 &   2 &   3 & $\ldots$ &  28 &  29 \\\hline
 570 %\end{tabular}
 571 %\end{center}
 572 %\vspace{\baselineskip}
 573
 574 When accessed in coded order, each block in the luma plane is assigned the
 575  following indices:
 576
 577 \vspace{\baselineskip}
 578 \begin{center}
 579 \begin{tabular}{|cccc|c|cc|}\hline
 580 123 & 122 & 125 & 124 & $\ldots$ & 179 & 178 \\
 581 120 & 121 & 126 & 127 & $\ldots$ & 176 & 177 \\\hline
 582   5 &   6 &   9 &  10 & $\ldots$ & 117 & 118 \\
 583   4 &   7 &   8 &  11 & $\ldots$ & 116 & 119 \\
 584   3 &   2 &  13 &  12 & $\ldots$ & 115 & 114 \\
 585   0 &   1 &  14 &  15 & $\ldots$ & 112 & 113 \\\hline
 586 \end{tabular}
 587 \end{center}
 588 \vspace{\baselineskip}
 589
 590 Here the index values specify the order in which the blocks would be accessed.
 591 The indices of the blocks are numbered continuously from one color plane to the
 592  next.
 593 They do not reset to zero at the start of each plane.
 594 Instead, the numbering increases continuously from the $Y'$ plane to the $C_b$
 595  plane to the $C_r$ plane.
 596 The implication is that the blocks from all planes are treated as a unit during
 597  the various processing steps.
 598
 599 Although blocks are sometimes accessed in raster order, in this document the
 600  index associated with a block is {\em always} its index in coded order.
 601
 602 \section{Macro Blocks}
 603 \label{sec:mbs}
 604
 605 A macro block contains a $2\times 2$ array of blocks in the luma plane
 606  {\em and} the co-located blocks in the chroma planes, as shown in
 607  Figure~\ref{fig:macroblock}.
 608 Thus macro blocks can represent anywhere from six to twelve blocks, depending
 609  on how the chroma planes are subsampled.
 610 This is in contrast to super blocks, which only contain blocks from a single
 611  color plane.
 612 % the whole super vs. macro blocks thing is a little confusing, and it can be
 613 % hard to remember which is what initially. A figure would/will help here,
 614 % but I tried to add some text emphasizing the difference in terms of
 615 % functionality.
 616 %TBT: At this point we haven't described any functionality yet.
 617 %TBT: As far as the reader knows, the only purpose of the blocks, macro blocks
 618 %TBT:  and super blocks is for data organization---and for blocks and super
 619 %TBT:  blocks, this is essentially true.
 620 %TBT: So lets restrict the differences we emphasize to those of data
 621 %TBT:  organization, which the sentence I just added above does.
 622 Macro blocks contain information about coding mode and motion vectors for the
 623  corresponding blocks in all color planes.
 624
 625 \begin{figure}[htbp]
 626  \begin{center}
 627  \includegraphics{macroblock}
 628  \end{center}
 629  \caption{Subdivision of a frame into macro blocks}
 630  \label{fig:macroblock}
 631 \end{figure}
 632
 633 Macro blocks are also accessed in a \term{coded order}.
 634 This coded order proceeds by examining each super block in the luma plane in
 635  raster order, and traversing the four macro blocks inside using a smaller
 636  Hilbert curve, as shown in Figure~\ref{fig:hilbert-mb}.
 637 %r: I rearranged the wording to make a more formal idiom here
 638 If the luma plane does not contain a complete super block on the top or right
 639  sides, the same ordering is still used, with any macro blocks outside
 640  the frame boundary simply omitted.
 641 Because the frame size is constrained to be a multiple of 16, there are never
 642  any partial macro blocks.
 643 Unlike blocks, macro blocks need never be accessed in a pure raster order.
 644
 645 \begin{figure}[htbp]
 646 \begin{center}
 647 \includegraphics{hilbert-mb}
 648 \end{center}
 649 \caption{Hilbert curve ordering of macro blocks within a super block}
 650 \label{fig:hilbert-mb}
 651 \end{figure}
 652
 653 Using the same frame size as the example above, there are 15 macro blocks in
 654  each row and 3 rows of macro blocks.
 655 The macro blocks are assigned the following indices:
 656
 657 \vspace{\baselineskip}
 658 \begin{center}
 659 \begin{tabular}{|cc|cc|c|cc|c|}\hline
 660 30 & 31 & 32 & 33 & $\cdots$ & 42 & 43 & 44 \\\hline
 661  1 &  2 &  5 &  6 & $\cdots$ & 25 & 26 & 29 \\
 662  0 &  3 &  4 &  7 & $\cdots$ & 24 & 27 & 28 \\\hline
 663 \end{tabular}
 664 \end{center}
 665 \vspace{\baselineskip}
 666
 667 \section{Coding Modes and Prediction}
 668
 669 Each block is coded using one of a small, fixed set of \term{coding modes} that
 670  define how the block is predicted from previous frames.
 671 A block is predicted using one of two \term{reference frames}, selected
 672  according to the coding mode.
 673 A reference frame is the fully decoded version of a previous frame in the
 674  stream.
 675 The first available reference frame is the previous intra frame, called the
 676  \term{golden frame}.
 677 The second available reference frame is the previous frame, whether it was an
 678  intra frame or an inter frame.
 679 If the previous frame was an intra frame, then both reference frames are the
 680  same.
 681 See Figure~\ref{fig:reference-frames} for an illustration of the reference
 682  frames used for an intra frame that does not follow an intra frame.
 683
 684 \begin{figure}[htbp]
 685 \begin{center}
 686 \includegraphics{reference-frames}
 687 \end{center}
 688 \caption{Example of reference frames for an inter frame}
 689 \label{fig:reference-frames}
 690 \end{figure}
 691
 692 Two coding modes in particular are worth mentioning here.
 693 The INTRA mode is used for blocks that are not predicted from either reference
 694  frame.
 695 This is the only coding mode allowed in intra frames.
 696 The INTER\_NOMV coding mode uses the co-located contents of the block in the
 697  previous frame as the predictor.
 698 This is the default coding mode.
 699
 700 \section{DCT Coefficients}
 701 \label{sec:dct-coeffs}
 702
 703 A \term{residual} is added to the predicted contents of a block to form the
 704  final reconstruction.
 705 The residual is stored as a set of quantized coefficients from an integer
 706  approximation of a two-dimensional Type II Discrete Cosine Transform.
 707 The DCT takes an $8\times 8$ array of pixel values as input and returns an
 708  $8\times 8$ array of coefficient values.
 709 The \term{natural ordering} of these coefficients is defined to be row-major
 710  order, from lowest to highest frequency.
 711 They are also often indexed in \term{zig-zag order}, as shown in
 712  Figure~\ref{tab:zig-zag}.
 713
 714 \begin{figure}[htbp]
 715 \begin{center}
 716 \begin{tabular}[c]{rr|c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c@{}c}
 717  &\multicolumn{1}{r}{} & && &&&&&$c$&&& && &&  \\
 718  &\multicolumn{1}{r}{} &0&&1&&2&&3&&4&&5&&6&&7 \\\cline{3-17}
 719  &0 &  0 &$\rightarrow$&  1 &&  5 &$\rightarrow$&  6 && 14 &$\rightarrow$& 15 && 27 &$\rightarrow$& 28            \\[-0.5\defaultaddspace]
 720  &  &    &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&                  \\
 721  &1 &  2 &             &  4 &&  7 &             & 13 && 16 &             & 26 && 29 &             & 42            \\[-0.5\defaultaddspace]
 722  &  &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
 723  &2 &  3 &             &  8 && 12 &             & 17 && 25 &             & 30 && 41 &             & 43            \\[-0.5\defaultaddspace]
 724  &  &    &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&                  \\
 725  &3 &  9 &             & 11 && 18 &             & 24 && 31 &             & 40 && 44 &             & 53            \\[-0.5\defaultaddspace]
 726 $r$&&$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
 727  &4 & 10 &             & 19 && 23 &             & 32 && 39 &             & 45 && 52 &             & 54            \\[-0.5\defaultaddspace]
 728  &  &    &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&                  \\
 729  &5 & 20 &             & 22 && 33 &             & 38 && 46 &             & 51 && 55 &             & 60            \\[-0.5\defaultaddspace]
 730  &  &$\downarrow$&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&&$\swarrow$&&$\nearrow$&$\downarrow$ \\
 731  &6 & 21 &             & 34 && 37 &             & 47 && 50 &             & 56 && 59 &             & 61            \\[-0.5\defaultaddspace]
 732  &  &    &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&&$\nearrow$& &$\swarrow$&                  \\
 733  &7 & 35 &$\rightarrow$& 36 && 48 &$\rightarrow$& 49 && 57 &$\rightarrow$& 58 && 62 &$\rightarrow$& 63
 734 \end{tabular}
 735 \end{center}
 736 \caption{Zig-zag order}
 737 \label{tab:zig-zag}
 738 \end{figure}
 739
 740 \begin{verse}
 741 {\bf Note:} the row and column indices refer to {\em frequency number} and not
 742  pixel locations.
 743 The frequency numbers are defined independently of the memory organization of
 744  the pixels.
 745 They have been written from top to bottom here to follow conventional notation,
 746  despite the right-handed coordinate system Theora uses for pixel locations.
 747 %RG: I'd rather we were internally consistent and put dc at the lower left.
 748 Many implementations of the DCT operate `in-place'.
 749 That is, they return DCT coefficients in the same memory buffer that the
 750  initial pixel values were stored in.
 751 Due to the right-handed coordinate system used for pixel locations in Theora,
 752  one must note carefully how both pixel values and DCT coefficients are
 753  organized in memory in such a system.
 754 \end{verse}
 755
 756 DCT coefficient $(0,0)$ is called the \term{DC coefficient}.
 757 All the other coefficients are called \term{AC coefficients}.
 758
 759
 760 \chapter{Decoding Overview}
 761
 762 This section provides a high level description of the Theora codec's
 763  construction.
 764 A bit-by-bit specification appears beginning in Section~\ref{sec:bitpacking}.
 765 The later sections assume a high-level understanding of the Theora decode
 766  process, which is provided below.
 767
 768 \section{Decoder Configuration}
 769
 770 Decoder setup consists of configuration of the quantization matrices and the
 771  Huffman codebooks for the DCT coefficients, and a table of limit values for
 772  the deblocking filter.
 773 The remainder of the decoding pipeline is not configurable.
 774
 775 \subsection{Global Configuration}
 776
 777 The global codec configuration consists of a few video related fields, such as
 778  frame rate, frame size, picture size and offset, aspect ratio, color space,
 779  pixel format, and a version number.
 780 The version number is divided into a major version, a minor version, amd a
 781  minor revision number.
 782 %r: afaik the released vp3 codec called itself 3.1 and is compatible w/ theora
 783 %r: even though we received the in-progress 3.2 codebase
 784 For the format defined in this specification, these are `3', `2', and
 785  `0', respectively, in reference to Theora's origin as a successor to the VP3.1
 786  format.
 787
 788 \subsection{Quantization Matrices}
 789
 790 Theora allows up to 384 different quantization matrices to be defined, one for
 791  each \term{quantization type}, \term{color plane} ($Y'$, $C_b$, or $C_r$), and
 792  \term{quantization index}, \qi, which ranges from zero to 63, inclusive.
 793 There are currently two quantization types defined, which depend on the coding
 794  mode of the block being dequantized, as shown in Table~\ref{tab:quant-types}.
 795
 796 \begin{table}[htbp]
 797 \begin{center}
 798 \begin{tabular}{cl}\toprule
 799 Quantization Type & Usage                     \\\midrule
 800 $0$               & INTRA-mode blocks         \\
 801 $1$               & Blocks in any other mode. \\
 802 \bottomrule\end{tabular}
 803 \end{center}
 804 \caption{Quantization Type Indices}
 805 \label{tab:quant-types}
 806 \end{table}
 807
 808 %r: I think 'nominally' is more specific than 'generally' here
 809 The quantization index, on the other hand, nominally represents a progressive
 810  range of quality levels, from low quality near zero to high quality near 63.
 811 However, the interpretation is arbitrary, and it is possible, for example, to
 812  partition the scale into two completely separate ranges with 32 levels each
 813  that are meant to represent different classes of source material, or any
 814  other arrangement that suits the encoder's requirements.
 815
 816 Each quantization matrix is an $8\times 8$ matrix of 16-bit values, which is
 817  used to quantize the output of the $8\times 8$ DCT\@.
 818 Quantization matrices are specified using three components: a
 819  \term{base matrix} and two \term{scale values}.
 820 The first scale value is the \term{DC scale}, which is applied to the DC
 821  component of the base matrix.
 822 The second scale value is the \term{AC scale}, which is applied to all the
 823  other components of the base matrix.
 824 There are 64 DC scale values and 64 AC scale values, one for each \qi\ value.
 825
 826 There are 64 elements in each base matrix, one for each DCT coefficient.
 827 They are stored in natural order (cf. Section~\ref{sec:dct-coeffs}).
 828 There is a separate set of base matrices for each quantization type and each
 829  color plane, with up to 64 possible base matrices in each set, one for each
 830  \qi\ value.
 831 %r: we will mention that the given matricies must bound the \qi range
 832 %r: in the detailed section. it's not important at this level.
 833 Typically the bitstream contains matrices for only a sparse subset of the
 834  possible \qi\ values.
 835 The base matrices for the remainder of the \qi\ values are computed using
 836  linear interpolation.
 837 This configuration allows the encoder to adjust the quantization matrices to
 838  approximate the complex, non-linear response of the human visual system to
 839  different quantization errors.
 840
 841 Finally, because the in-loop deblocking filter strength depends on the strength
 842  of the quantization matrices defined in this header, a table of 64 \term{loop
 843  filter limit values} is defined, one for each \qi\ value.
 844
 845 The precise specification of how all of this information is decoded appears in
 846  Section~\ref{sub:loop-filter-limits} and Section~\ref{sub:quant-params}.
 847
 848 \subsection{Huffman Codebooks}
 849
 850 Theora uses 80 configurable binary Huffman codes to represent the 32 tokens
 851  used to encode DCT coefficients.
 852 Each of the 32 token values has a different semantic meaning and is used to
 853  represent single coefficient values, zero runs, combinations of the two, and
 854  \term{End-Of-Block markers}.
 855
 856 The 80 codes are divided up into five groups of 16, with each group
 857  corresponding to a set of DCT coefficient indices.
 858 The first group corresponds to the DC coefficient, while the remaining four
 859  groups correspond to different subsets of the AC coefficients.
 860 Within each frame, two pairs of 4-bit codebook indices are stored.
 861 The first pair selects which codebooks to use from the DC coefficient group for
 862  the $Y'$ coefficients and the $C_b$ and $C_r$ coefficients.
 863 The second pair selects which codebooks to use from {\em all four} of the AC
 864  coefficient groups for the $Y'$ coefficients and the $C_b$ and $C_r$
 865  coefficients.
 866
 867 The precise specification of how the codebooks are decoded appears in
 868  Section~\ref{sub:huffman-tables}.
 869
 870 \section{High-Level Decode Process}
 871
 872 \subsection{Decoder Setup}
 873
 874 Before decoding can begin, a decoder MUST be initialized using the bitstream
 875  headers corresponding to the stream to be decoded.
 876 Theora uses three header packets; all are required, in order, by this
 877  specification.
 878 Once set up, decode may begin at any intra-frame packet---or even inter-frame
 879  packets, provided the appropriate decoded reference frames have already been
 880  decoded and cached---belonging to the Theora stream.
 881 In Theora I, all packets after the three initial headers are intra-frame or
 882  inter-frame packets.
 883
 884 The header packets are, in order, the identification header, the comment
 885  header, and the setup header.
 886
 887 \paragraph{Identification Header}
 888
 889 The identification header identifies the stream as Theora, provides a version
 890  number, and defines the characteristics of the video stream such as frame
 891  size.
 892 A complete description of the identification header appears in
 893  Section~\ref{sec:idheader}.
 894
 895 \paragraph{Comment Header}
 896
 897 The comment header includes user text comments (`tags') and a vendor string
 898  for the application/library that produced the stream.
 899 The format of the comment header is the same as that used in the Vorbis I and
 900  Speex codecs, with slight modifications due to the use of a different bit
 901  packing mechanism.
 902 A complete description of how the comment header is coded appears in
 903  Section~\ref{sec:commentheader}, along with a suggested set of tags.
 904
 905 \paragraph{Setup Header}
 906
 907 The setup header includes extensive codec setup information, including the
 908  complete set of quantization matrices and Huffman codebooks needed to decode
 909  the DCT coefficients.
 910 A complete description of the setup header appears in
 911  Section~\ref{sec:setupheader}.
 912
 913 \subsection{Decode Procedure}
 914
 915 The decoding and synthesis procedure for all video packets is fundamentally the
 916  same, with some steps omitted for intra frames.
 917 \begin{itemize}
 918 \item
 919 Decode packet type flag.
 920 \item
 921 Decode frame header.
 922 \item
 923 Decode coded block information (inter frames only).
 924 \item
 925 Decode macro block mode information (inter frames only).
 926 \item
 927 Decode motion vectors (inter frames only).
 928 \item
 929 Decode block-level \qi\ information.
 930 \item
 931 Decode DC coefficient for each coded block.
 932 \item
 933 Decode 1st AC coefficient for each coded block.
 934 \item
 935 Decode 2nd AC coefficient for each coded block.
 936 \item
 937 $\ldots$
 938 \item
 939 Decode 63rd AC coefficient for each coded block.
 940 \item Perform DC coefficient prediction.
 941 \item Reconstruct coded blocks.
 942 \item Copy uncoded bocks.
 943 \item Perform loop filtering.
 944 \end{itemize}
 945
 946 \begin{verse}
 947 {\bf Note:} clever rearrangement of the steps in this process is possible.
 948 As an example, in a memory-constrained environment, one can make multiple
 949  passes through the DCT coefficients to avoid buffering them all in memory.
 950 On the first pass, the starting location of each coefficient is identified, and
 951  then 64 separate get pointers are used to read in the 64 DCT coefficients
 952  required to reconstruct each coded block in sequence.
 953 This operation produces entirely equivalent output and is naturally perfectly
 954  legal.
 955 It may even be a benefit in non-memory-constrained environments due to a
 956  reduced cache footprint.
 957 \end{verse}
 958
 959 Theora makes equivalence easy to check by defining all decoding operations in
 960  terms of exact integer operations.
 961 No floating-point math is required, and in particular, the implementation of
 962  the iDCT transform MUST be followed precisely.
 963 This prevents the decoder mismatch problem commonly associated with codecs that
 964  provide a less rigorous transform specification.
 965 Such a mismatch problem would be devastating to Theora, since a single rounding
 966  error in one frame could propagate throughout the entire succeeding frame due
 967  to DC prediction.
 968
 969 \paragraph{Packet Type Decode}
 970
 971 Theora I uses four packet types.
 972 The first three packet types mark each of the three Theora headers described
 973  above.
 974 The fourth packet type marks a video packet.
 975 All other packet types are reserved; packets marked with a reserved type should
 976  be ignored.
 977
 978 \paragraph{Frame Header Decode}
 979
 980 The frame header contains some global information about the current frame.
 981 The first is the frame type field, which specifies if this is an intra frame or
 982  an inter frame.
 983 Inter frames predict their contents from previously decoded reference frames.
 984 Intra frames can be independently decoded with no established reference frames.
 985
 986 The next piece of information in the frame header is the list of \qi\ values
 987  allowed in the frame.
 988 Theora allows from one to three different \qi\ values to be used in a single
 989  frame, each of which selects a set of six quantization matrices, one for each
 990  quantization type (inter or intra), and one for each color plane.
 991 The first \qi\ value is {\em always} used when dequantizing DC coefficients.
 992 The \qi\ value used when dequantizing AC coefficients, however, can vary from
 993  block to block.
 994 VP3, in contrast, only allows a single \qi\ value per frame for both the DC and
 995  AC coefficients.
 996
 997 \paragraph{Coded Block Information}
 998
 999 This stage determines which blocks in the frame are coded and which are
1000  uncoded.
1001 A \term{coded block list} is constructed which lists all the coded blocks in
1002  coded order.
1003 For intra frames, every block is coded, and so no data needs to be read from
1004  the packet.
1005
1006 \paragraph{Macro Block Mode Information}
1007
1008 For intra frames, every block is coded in INTRA mode, and this stage is
1009  skipped.
1010 In inter frames a \term{coded macro block list} is constructed from the coded
1011  block list.
1012 Any macro block which has at least one of its luma blocks coded is considered
1013  coded; all other macro blocks are uncoded, even if they contain coded chroma
1014  blocks.
1015 A coding mode is decoded for each coded macro block, and assigned to all its
1016  constituent coded blocks.
1017 All coded chroma blocks in uncoded macro blocks are assigned the INTER\_NOMV
1018  coding mode.
1019
1020 \paragraph{Motion Vectors}
1021
1022 Intra frames are coded entirely in INTRA mode, and so this stage is skipped.
1023 Some inter coding modes, however, require one or more motion vectors to be
1024  specified for each macro block.
1025 These are decoded in this stage, and an appropriate motion vector is assigned
1026  to each coded block in the macro block.
1027
1028 \paragraph{Block-Level \qi\ Information}
1029
1030 If a frame allows multiple \qi\ values, the \qi\ value assigned to each block
1031  is decoded here.
1032 Frames that use only a single \qi\ value have nothing to decode.
1033
1034 \paragraph{DCT Coefficients}
1035
1036 Finally, the quantized DCT coefficients are decoded.
1037 A list of DCT coefficients in zig-zag order for a single block is represented
1038  by a list of tokens.
1039 A token can take on one of 32 different values, each with a different semantic
1040  meaning.
1041 A single token can represent a single DCT coefficient, a run of zero
1042  coefficients within a single block, a combination of a run of zero
1043  coefficients followed by a single non-zero coefficient, an
1044  \term{End-Of-Block marker}, or a run of EOB markers.
1045 EOB markers signify that the remainder of the block is one long zero run.
1046 Unlike JPEG and MPEG, there is no requirement for each block to end with
1047  a special marker.
1048 If non-EOB tokens yield values for all 64 of the coefficients in a block, then
1049  no EOB marker occurs.
1050
1051 Each token is associated with a specific \term{token index} in a block.
1052 For single-coefficient tokens, this index is the zig-zag index of the token in
1053  the block.
1054 For zero-run tokens, this index is the zig-zag index of the {\em first}
1055  coefficient in the run.
1056 For combination tokens, the index is again the zig-zag index of the first
1057  coefficient in the zero run.
1058 For EOB markers, which signify that the remainder of the block is one long zero
1059  run, the index is the zig-zag index of the first zero coefficient in that run.
1060 For EOB runs, the token index is that of the first EOB marker in the run.
1061 Due to zero runs and EOB markers, a block does not have to have a token for
1062  every zig-zag index.
1063
1064 Tokens are grouped in the stream by token index, not by the block they
1065  originate from.
1066 This means that for each zig-zag index in turn, the tokens with that index from
1067  {\em all} the coded blocks are coded in coded block order.
1068 When decoding, a current token index is maintained for each coded block.
1069 This index is advanced by the number of coefficients that are added to the
1070  block as each token is decoded.
1071 After fully decoding all the tokens with token index \ti, the current token
1072  index of every coded block will be \ti\ or greater.
1073
1074 If an EOB run of $n$ blocks is decoded at token index \ti, then it ends the
1075  next $n$ blocks in coded block order whose current token index is equal to
1076  \ti, but not greater.
1077 If there are fewer than $n$ blocks with a current token index of \ti, then the
1078  decoder goes through the coded block list again from the start, ending blocks
1079  with a current token index of $\ti+1$, and so on, until $n$ blocks have been
1080  ended.
1081
1082 Tokens are read by parsing a Huffman code that depends on \ti\ and the color
1083  plane of the next coded block whose current token index is equal to \ti, but
1084  not greater.
1085 The Huffman codebooks are selected on a per-frame basis from the 80 codebooks
1086  defined in the setup header.
1087 Many tokens have a fixed number of \term{extra bits} associated with them.
1088 These bits are read from the packet immediately after the token is decoded.
1089 These are used to define things such as coefficient magnitude, sign, and the
1090  length of runs.
1091
1092 \paragraph{DC Prediction}
1093
1094 After the coefficients for each block are decoded, the quantized DC value of
1095  each block is adjusted based on the DC values of its neighbors.
1096 This adjustment is performed by scanning the blocks in raster order, not coded
1097  block order.
1098
1099 \paragraph{Reconstruction}
1100
1101 Finally, using the coding mode, motion vector (if applicable), quantized
1102  coefficient list, and \qi\ value defined for each block, all the coded blocks
1103  are reconstructed.
1104 The DCT coefficients are dequantized, an inverse DCT transform is applied, and
1105  the predictor is formed from the coding mode and motion vector and added to
1106  the result.
1107
1108 \paragraph{Loop Filtering}
1109
1110 To complete the reconstructed frame, an ``in-loop'' deblocking filter is
1111  applied to the edges of all coded blocks.
1112
1113
1114 \chapter{Video Formats}
1115
1116 This section gives a precise description of the video formats that Theora is
1117  capable of storing.
1118 The Theora bitstream is capable of handling video at any arbitrary resolution
1119  up to $1048560\times 1048560$.
1120 Such video would require almost three terabytes of storage per frame for
1121  uncompressed data, so compliant decoders MAY refuse to decode images with
1122  sizes beyond their capabilities.
1123 %TODO: What MUST a "compliant" decoder accept?
1124 %TODO: What SHOULD a decoder use for an upper bound? (derive from total amount
1125 %TODO:  of memory and memory bandwidth)
1126 %TODO: Any lower limits?
1127 %TODO: We really need hardware device profiles, but such things should be
1128 %TODO:  developed with input from the hardware community.
1129 %TODO: And even then sometimes they're useless
1130
1131 The remainder of this section talks about two specific aspects of the video
1132  format: the color space and the pixel format.
1133 The first describes how color is represented and how to transform that color
1134  representation into a device independent color space such as CIE $XYZ$ (1931).
1135 The second describes the various schemes for sampling the color values in time
1136  and space.
1137
1138 \section{Color Space Conventions}
1139
1140 There are a large number of different color standards used in digital video.
1141 Since Theora is a lossy codec, it restricts itself to only a few of them to
1142  simplify playback.
1143 Unlike the alternate method of describing all the parameters of the color
1144  model, this allows a few dedicated routines for color conversion to be written
1145  and heavily optimized in a decoder.
1146 More flexible conversion functions should instead be specified in an encoder,
1147  where additional computational complexity is more easily tolerated.
1148 The color spaces were selected to give a fair representation of color standards
1149  in use around the world today.
1150 Most of the standards that do not exactly match one of these can be converted
1151  to one fairly easily.
1152
1153 All Theora color spaces are $Y'C_bC_r$ color spaces with one luma channel and
1154  two chroma channels.
1155 Each channel contains 8-bit discrete values in the range $0\ldots255$, which
1156  represent non-linear gamma pre-corrected signals.
1157 The Theora identification header contains an 8-bit value that describes the
1158  color space.
1159 This merely selects one of the color spaces available from an enumerated list.
1160 Currently, only two color spaces are defined, with a third possibility that
1161  indicates the color space is ``unknown".
1162
1163 \section{Color Space Conversions and Parameters}
1164 \label{sec:color-xforms}
1165
1166 The parameters which describe the conversions between each color space are
1167  listed below.
1168 These are the parameters needed to map colors from the encoded $Y'C_bC_r$
1169  representation to the device-independent color space CIE $XYZ$ (1931).
1170 These parameters define abstract mathematical conversion functions which are
1171  infinitely precise.
1172 The accuracy and precision with which the conversions are performed in a real
1173  system is determined by the quality of output desired and the available
1174  processing power.
1175 Exact decoder output is defined by this specification only in the original
1176  $Y'C_bC_r$ space.
1177
1178 \begin{description}
1179 \item[$Y'C_bC_r$ to $Y'P_bP_r$:]
1180 \vspace{\baselineskip}\hfill
1181
1182 This conversion takes 8-bit discrete values in the range $[0\ldots255]$ and
1183  maps them to real values in the range $[0\ldots1]$ for Y and
1184  $[-\frac{1}{2}\ldots\frac{1}{2}]$ for $P_b$ and $P_r$.
1185 Because some values may fall outside the offset and excursion defined for each
1186  channel in the $Y'C_bC_r$ space, the results may fall outside these ranges in
1187  $Y'P_bP_r$ space.
1188 No clamping should be done at this stage.
1189
1190 \begin{align}
1191 Y'_\mathrm{out} & =
1192  \frac{Y'_\mathrm{in}-\mathrm{Offset}_Y}{\mathrm{Excursion}_Y} \\
1193 P_b             & =
1194  \frac{C_b-\mathrm{Offset}_{C_b}}{\mathrm{Excursion}_{C_b}} \\
1195 P_r             & =
1196  \frac{C_r-\mathrm{Offset}_{C_r}}{\mathrm{Excursion}_{C_r}}
1197 \end{align}
1198
1199 Parameters: $\mathrm{Offset}_{Y,C_b,C_r}$, $\mathrm{Excursion}_{Y,C_b,C_r}$.
1200
1201 \item[$Y'P_bP_r$ to $R'G'B'$:]
1202 \vspace{\baselineskip}\hfill
1203
1204 This conversion takes the one luma and two chroma channel representation and
1205  maps it to the non-linear $R'G'B'$ space used to drive actual output devices.
1206 Values should be clamped into the range $[0\ldots1]$ after this stage.
1207
1208 \begin{align}
1209 R' & = Y'+2(1-K_r)P_r \\
1210 G' & = Y'-2\frac{(1-K_b)K_b}{1-K_b-K_r}P_b-2\frac{(1-K_r)K_r}{1-K_b-K_r}P_r\\
1211 B' & = Y'+2(1-K_b)P_b
1212 \end{align}
1213
1214 Parameters: $K_b,K_r$.
1215
1216 \item[$R'G'B'$ to $RGB$ (Output device gamma correction):]
1217 \vspace{\baselineskip}\hfill
1218
1219 This conversion takes the non-linear $R'G'B'$ voltage levels and maps them to
1220  linear light levels produced by the actual output device.
1221 Note that this conversion is only that of the output device, and its inverse is
1222  {\em not} that used by the input device.
1223 Because a dim viewing environment is assumed in most television standards, the
1224  overall gamma between the input and output devices is usually around $1.1$ to
1225  $1.2$, and not a strict $1.0$.
1226
1227 For calibration with actual output devices, the model
1228 \begin{align}
1229 L & =(E'+\Delta)^\gamma
1230 \end{align}
1231  should be used, with $\Delta$ the free parameter and $\gamma$ held fixed to
1232  the value specified in this document.
1233 The conversion function presented here is an idealized version with $\Delta=0$.
1234
1235 \begin{align}
1236 R & = R'^\gamma \\
1237 G & = G'^\gamma \\
1238 B & = B'^\gamma
1239 \end{align}
1240
1241 Parameters: $\gamma$.
1242
1243 \item[$RGB$ to $R'G'B'$ (Input device gamma correction):]
1244 \vspace{\baselineskip}\hfill
1245
1246 %TODO: Tag section as non-normative
1247
1248 This conversion takes linear light levels and maps them to the non-linear
1249  voltage levels produced in the actual input device.
1250 This information is merely informative.
1251 It is not required for building a decoder or for converting between the various
1252  formats and the actual output capabilities of a particular device.
1253
1254 A linear segment is introduced on the low end to reduce noise in dark areas of
1255  the image.
1256 The rest of the scale is adjusted so that the power segment of the curve
1257  intersects the linear segment with the proper slope, and so that it still maps
1258  0 to 0 and 1 to 1.
1259
1260 \begin{align}
1261 R' & = \left\{
1262 \begin{array}{ll}
1263 \alpha R,                     & 0\le R<\delta   \\
1264 (1+\epsilon)R^\beta-\epsilon, & \delta\le R\le1
1265 \end{array}\right. \\
1266 G' & = \left\{
1267 \begin{array}{ll}
1268 \alpha G,                     & 0\le G<\delta   \\
1269 (1+\epsilon)G^\beta-\epsilon, & \delta\le G\le1
1270 \end{array}\right. \\
1271 B' & = \left\{
1272 \begin{array}{ll}
1273 \alpha B,                     & 0\le B<\delta   \\
1274 (1+\epsilon)B^\beta-\epsilon, & \delta\le B\le1
1275 \end{array}\right.
1276 \end{align}
1277
1278 Parameters: $\beta$, $\alpha$, $\delta$, $\epsilon$.
1279
1280 \item[$RGB$ to CIE $XYZ$ (1931):]
1281 \vspace{\baselineskip}\hfill
1282
1283 This conversion maps a device-dependent linear RGB space to the
1284  device-independent linear CIE $XYZ$ space.
1285 The parameters are the CIE chromaticity coordinates of the three
1286  primaries---red, green, and blue---as well as the chromaticity coordinates
1287  of the white point of the device.
1288 This is how hardware manufacturers and standards typically describe a
1289  particular $RGB$ space.
1290 The math required to convert these parameters into a useful transformation
1291  matrix is reproduced below.
1292
1293 \begin{align}
1294 F                  & =
1295 \left[\begin{array}{ccc}
1296 \frac{x_r}{y_r}       & \frac{x_g}{y_g}       & \frac{x_b}{y_b}       \\
1297 1                     & 1                     & 1                     \\
1298 \frac{1-x_r-y_r}{y_r} & \frac{1-x_g-y_g}{y_g} & \frac{1-x_b-y_b}{y_b}
1299 \end{array}\right] \\
1300 \left[\begin{array}{c}
1301 s_r \\
1302 s_g \\
1303 s_b
1304 \end{array}\right] & =
1305 F^{-1}\left[\begin{array}{c}
1306 \frac{x_w}{y_w} \\
1307 1 \\
1308 \frac{1-x_w-y_w}{y_w}
1309 \end{array}\right] \\
1310 \left[\begin{array}{c}
1311 X \\
1312 Y \\
1313 Z
1314 \end{array}\right] & =
1315 F\left[\begin{array}{c}
1316 s_rR \\
1317 s_gG \\
1318 s_bB
1319 \end{array}\right]
1320 \end{align}
1321 Parameters: $x_r,x_g,x_b,x_w, y_r,y_g,y_b,y_w$.
1322
1323 \end{description}
1324
1325 \section{Available Color Spaces}
1326 \label{sec:colorspaces}
1327
1328 These are the color spaces currently defined for use by Theora video.
1329 Each one has a short name, with which it is referred to in this document, and
1330  a more detailed specification of the standards from which its parameters are
1331  derived.
1332 Some standards do not specify all the parameters necessary.
1333 For these unspecified parameters, this document serves as the definition of
1334  what should be used when encoding or decoding Theora video.
1335
1336 \subsection{Rec.~470M (Rec.~ITU-R~BT.470-6 System M/NTSC with
1337  Rec.~ITU-R~BT.601-5)}
1338 \label{sec:470m}
1339
1340 This color space is used by broadcast television and DVDs in much of the
1341  Americas, Japan, Korea, and the Union of Myanmar \cite{rec470}.
1342 This color space may also be used for System M/PAL (Brazil), with an
1343  appropriate conversion supplied by the encoder to compensate for the
1344  different gamma value.
1345 See Section~\ref{sec:470bg} for an appropriate gamma value to assume for M/PAL
1346  input.
1347
1348 In the US, studio monitors are adjusted to a D65 white point
1349  ($x_w,y_w=0.313,0.329$).
1350 In Japan, studio monitors are adjusted to a D white of 9300K
1351  ($x_w,y_w=0.285,0.293$).
1352
1353 Rec.~470 does not specify a digital encoding of the color signals.
1354 For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the
1355  $R'G'B'$ signals specified by Rec.~470.
1356
1357 Rec.~470 does not specify an input gamma function.
1358 For Theora, the Rec.~709 \cite{rec709} input function is assumed.
1359 This is the same as that specified by SMPTE 170M \cite{smpte170m}, which claims
1360  to reflect modern practice in the creation of NTSC signals circa 1994.
1361
1362 The parameters for all the color transformations defined in
1363  Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470m}.
1364
1365 \begin{table}[htb]
1366 \begin{align*}
1367 \mathrm{Offset}_{Y,C_b,C_r}    & = (16, 128, 128)  \\
1368 \mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\
1369 K_r                            & = 0.299           \\
1370 K_b                            & = 0.114           \\
1371 \gamma                         & = 2.2             \\
1372 \beta                          & = 0.45            \\
1373 \alpha                         & = 4.5             \\
1374 \delta                         & = 0.018           \\
1375 \epsilon                       & = 0.099           \\
1376 x_r,y_r                        & = 0.67, 0.33      \\
1377 x_g,y_g                        & = 0.21, 0.71      \\
1378 x_b,y_b                        & = 0.14, 0.08      \\
1379 \text{(Illuminant C) } x_w,y_w & = 0.310, 0.316    \\
1380 \end{align*}
1381 \caption{Rec.~470M Parameters}
1382 \label{tab:470m}
1383 \end{table}
1384
1385 \subsection{Rec.~470BG (Rec.~ITU-R~BT.470-6 Systems B and G with
1386  Rec.~ITU-R~BT.601-5)}
1387 \label{sec:470bg}
1388
1389 This color space is used by the PAL and SECAM systems in much of the rest of
1390  the world \cite{rec470}
1391 This can be used directly by systems (B, B1, D, D1, G, H, I, K, N)/PAL and (B,
1392  D, G, H, K, K1, L)/SECAM\@.
1393
1394 \begin{verse}
1395 {\bf Note:} the Rec.~470BG chromaticity values are different from those
1396  specified in Rec.~470M\@.
1397 When PAL and SECAM systems were first designed, they were based upon the same
1398  primaries as NTSC\@.
1399 However, as methods of making color picture tubes have changed, the primaries
1400  used have changed as well.
1401 The U.S. recommends using correction circuitry to approximate the existing,
1402  standard NTSC primaries.
1403 Current PAL and SECAM systems have standardized on primaries in accord with
1404  more recent technology.
1405 \end{verse}
1406
1407 Rec.~470 provisionally permits the use of the NTSC chromaticity values (given
1408  in Section~\ref{sec:470m}) with legacy PAL and SECAM equipment.
1409 In Theora, material must be decoded assuming the new PAL and SECAM primaries.
1410 Material intended for display on old legacy devices should be converted by the
1411  decoder.
1412
1413 The official Rec.~470BG specifies a gamma value of $\gamma=2.8$.
1414 However, in practice this value is unrealistically high \cite{Poyn97}.
1415 Rec.~470BG states that the overall system gamma should be approximately
1416  $\gamma\beta=1.2$.
1417 Since most cameras pre-correct with a gamma value of $\beta=0.45$,
1418  this suggests an output device gamma of approximately $\gamma=2.67$.
1419 This is the value recommended for use with PAL systems in Theora.
1420
1421 Rec.~470 does not specify a digital encoding of the color signals.
1422 For Theora, Rec.~ITU-R~BT.601-5 \cite{rec601} is used, starting from the
1423  $R'G'B'$ signals specified by Rec.~470.
1424
1425 Rec.~470 does not specify an input gamma function.
1426 For Theora, the Rec 709 \cite{rec709} input function is assumed.
1427
1428 The parameters for all the color transformations defined in
1429  Section~\ref{sec:color-xforms} are given in Table~\ref{tab:470bg}.
1430
1431 \begin{table}[htb]
1432 \begin{align*}
1433 \mathrm{Offset}_{Y,C_b,C_r}    & = (16, 128, 128)  \\
1434 \mathrm{Excursion}_{Y,C_b,C_r} & = (219, 224, 224) \\
1435 K_r                            & = 0.299           \\
1436 K_b                            & = 0.114           \\
1437 \gamma                         & = 2.67            \\
1438 \beta                          & = 0.45            \\
1439 \alpha                         & = 4.5             \\
1440 \delta                         & = 0.018           \\
1441 \epsilon                       & = 0.099           \\
1442 x_r,y_r                        & = 0.64, 0.33      \\
1443 x_g,y_g                        & = 0.29, 0.60      \\
1444 x_b,y_b                        & = 0.15, 0.06      \\
1445 \text{(D65) } x_w,y_w          & = 0.313, 0.329    \\
1446 \end{align*}
1447 \caption{Rec.~470BG Parameters}
1448 \label{tab:470bg}
1449 \end{table}
1450
1451 \section{Pixel Formats}
1452 \label{sec:pixfmts}
1453
1454 Theora supports several different pixel formats, each of which uses different
1455  subsampling for the chroma planes relative to the luma plane.
1456
1457 \subsection{4:4:4 Subsampling}
1458 \label{sec:444}
1459
1460 All three color planes are stored at full resolution - each pixel has a $Y'$,
1461  a $C_b$ and a $C_r$ value (see Figure~\ref{fig:pixel444}).
1462 The samples in the different planes are all at co-located sites.
1463
1464 \begin{figure}[htbp]
1465 \begin{center}
1466 \includegraphics{pixel444}
1467 \end{center}
1468 \caption{Pixels encoded 4:4:4}
1469 \label{fig:pixel444}
1470 \end{figure}
1471
1472 % Figure.
1473 %YRB         YRB
1474 %
1475 %
1476 %
1477 %YRB         YRB
1478 %
1479 %
1480 %
1481
1482
1483 \subsection{4:2:2 Subsampling}
1484 \label{sec:422}
1485
1486 The $C_b$ and $C_r$ planes are stored with half the horizontal resolution of
1487  the $Y'$ plane.
1488 Thus, each of these planes has half the number of horizontal blocks as the luma
1489  plane (see Figure~\ref{fig:pixel422}).
1490 Similarly, they have half the number of horizontal super blocks, rounded up.
1491 Macro blocks are defined across color planes, and so their number does not
1492  change, but each macro block contains half as many chroma blocks.
1493
1494 The chroma samples are vertically aligned with the luma samples, but
1495  horizontally centered between two luma samples.
1496 Thus, each luma sample has a unique closest chroma sample.
1497 A horizontal phase shift may be required to produce signals which use different
1498  horizontal chroma sampling locations for compatibility with different systems.
1499
1500 \begin{figure}[htbp]
1501 \begin{center}
1502 \includegraphics{pixel422}
1503 \end{center}
1504 \caption{Pixels encoded 4:2:2}
1505 \label{fig:pixel422}
1506 \end{figure}
1507
1508 % Figure.
1509 %Y     RB    Y           Y     RB    Y
1510 %
1511 %
1512 %
1513 %Y     RB    Y           Y     RB    Y
1514 %
1515 %
1516 %
1517
1518 \subsection{4:2:0 Subsampling}
1519 \label{sec:420}
1520
1521 The $C_b$ and $C_r$ planes are stored with half the horizontal and half the
1522  vertical resolution of the $Y'$ plane.
1523 Thus, each of these planes has half the number of horizontal blocks and half
1524  the number of vertical blocks as the luma plane, for a total of one quarter
1525  the number of blocks (see Figure~\ref{fig:pixel420}).
1526 Similarly, they have half the number of horizontal super blocks and half the
1527  number of vertical super blocks, rounded up.
1528 Macro blocks are defined across color planes, and so their number does not
1529  change, but each macro block contains within it one quarter as many
1530  chroma blocks.
1531
1532 The chroma samples are vertically and horizontally centered between four luma
1533  samples.
1534 Thus, each luma sample has a unique closest chroma sample.
1535 This is the same sub-sampling pattern used with JPEG, MJPEG, and MPEG-1, and
1536  was inherited from VP3.
1537 A horizontal or vertical phase shift may be required to produce signals which
1538  use different chroma sampling locations for compatibility with different
1539  systems.
1540
1541 \begin{figure}[htbp]
1542 \begin{center}
1543 \includegraphics{pixel420}
1544 \end{center}
1545 \caption{Pixels encoded 4:2:0}
1546 \label{fig:pixel420}
1547 \end{figure}
1548
1549 % Figure.
1550 %Y           Y           Y           Y
1551 %
1552 %      RB                      RB
1553 %
1554 %Y           Y           Y           Y
1555 %
1556 %
1557 %
1558 %Y           Y           Y           Y
1559 %
1560 %      RB                      RB
1561 %
1562 %Y           Y           Y           Y
1563 %
1564 %
1565 %
1566
1567 \subsection{Subsampling and the Picture Region}
1568
1569 Although the frame size must be an integral number of macro blocks, and thus
1570  both the number of pixels and the number of blocks in each direction must be
1571  even, no such requirement is made of the picture region.
1572 Thus, when using subsampled pixel formats, careful attention must be paid to
1573  which chroma samples correspond to which luma samples.
1574
1575 As mentioned above, for each pixel format, there is a unique chroma sample that
1576  is the closest to each luma sample.
1577 When cropping the chroma planes to the picture region, all the chroma samples
1578  corresponding to a luma sample in the cropped picture region must be included.
1579 Thus, when dividing the width or height of the picture region by two to obtain
1580  the size of the subsampled chroma planes, they must be rounded up.
1581
1582 Furthermore, the sampling locations are defined relative to the frame,
1583  {\em not} the picture region.
1584 When using the 4:2:2 and 4:2:0 formats, the locations of chroma samples
1585  relative to the luma samples depends on whether or not the X offset of the
1586  picture region is odd.
1587 If the offset is even, each column of chroma samples corresponds to two columns
1588  of luma samples (see Figure~\ref{fig:pic_even} for an example).
1589 The only exception is if the width is odd, in which case the last column
1590  corresponds to only one column of luma samples (see Figure~\ref{fig:pic_even_odd}).
1591 If the offset is odd, then the first column of chroma samples corresponds to
1592  only one column of luma samples, while the remaining columns each correspond
1593  to two (see Figure~\ref{fig:pic_odd}).
1594 In this case, if the width is even, the last column again corresponds to only
1595  one column of luma samples (see Figure~\ref{fig:pic_odd_even}).
1596
1597 A similar process is followed with the rows of a picture region of odd height
1598  encoded in the 4:2:0 format.
1599 If the Y offset is even, each row of chroma samples corresponds to two rows of
1600  luma samples (see Figure~\ref{fig:pic_even}), except with an odd height, where
1601  the last row corresponds to one row of chroma luna samples only (see
1602  Figure~\ref{fig:pic_even_odd}).
1603 If the offset is odd, then it is the first row of chroma samples which
1604  corresponds to only one row of luma samples, while the remaining rows each
1605  correspond to two (Figure~\ref{fig:pic_odd}), except with an even height,
1606  where the last row also corresponds to one (Figure~\ref{fig:pic_odd_even}).
1607
1608 Encoders should be aware of these differences in the subsampling when using an
1609  even or odd offset.
1610 In the typical case, with an even width and height, where one expects two rows
1611  or columns of luma samples for every row or column of chroma samples, the
1612  encoder must take care to ensure that the offsets used are both even.
1613
1614 \begin{figure}[htbp]
1615 \begin{center}
1616 \includegraphics[width=\textwidth]{pic_even}
1617 \end{center}
1618 \caption{Pixel correspondence between color planes with even picture
1619  offset and even picture size}
1620 \label{fig:pic_even}
1621 \end{figure}
1622
1623 \begin{figure}[htbp]
1624 \begin{center}
1625 \includegraphics[width=\textwidth]{pic_even_odd}
1626 \end{center}
1627 \caption{Pixel correspondence with even picture offset and
1628  odd picture size}
1629 \label{fig:pic_even_odd}
1630 \end{figure}
1631
1632 \begin{figure}[htbp]
1633 \begin{center}
1634 \includegraphics[width=\textwidth]{pic_odd}
1635 \end{center}
1636 \caption{Pixel correspondence with odd picture offset and
1637  odd picture size}
1638 \label{fig:pic_odd}
1639 \end{figure}
1640
1641 \begin{figure}[htbp]
1642 \begin{center}
1643 \includegraphics[width=\textwidth]{pic_odd_even}
1644 \end{center}
1645 \caption{Pixel correspondence with odd picture offset and
1646  even picture size}
1647 \label{fig:pic_odd_even}
1648 \end{figure}
1649
1650
1651 \chapter{Bitpacking Convention}
1652 \label{sec:bitpacking}
1653
1654 \section{Overview}
1655
1656 The Theora codec uses relatively unstructured raw packets containing
1657  binary integer fields of arbitrary width.
1658 Logically, each packet is a bitstream in which bits are written one-by-one by
1659  the encoder and then read one-by-one in the same order by the decoder.
1660 Most current binary storage arrangements group bits into a native storage unit
1661  of eight bits (octets), sixteen bits, thirty-two bits, or less commonly other
1662  fixed sizes.
1663 The Theora bitpacking convention specifies the correct mapping of the logical
1664  packet bitstream into an actual representation in fixed-width units.
1665
1666 \subsection{Octets and Bytes}
1667
1668 In most contemporary architectures, a `byte' is synonymous with an `octect',
1669  that is, eight bits.
1670 For purposes of the bitpacking convention, a byte implies the smallest native
1671  integer storage representation offered by a platform.
1672 Modern file systems invariably offer bytes as the fundamental atom of storage.
1673
1674 The most ubiquitous architectures today consider a `byte' to be an octet.
1675 Note, however, that the Theora bitpacking convention is still well defined for
1676  any native byte size; an implementation can use the native bit-width of a
1677  given storage system.
1678 This document assumes that a byte is one octet for purposes of example only.
1679
1680 \subsection{Words and Byte Order}
1681
1682 A `word' is an integer size that is a grouped multiple of the byte size.
1683 Most architectures consider a word to be a group of two, four, or eight bytes.
1684 Each byte in the word can be ranked by order of `significance', e.g.\ the
1685  significance of the bits in each byte when storing a binary integer in the
1686  word.
1687 Several byte orderings are possible in a word.
1688 The common ones are
1689 \begin{itemize}
1690 \item{Big-endian:}
1691 in which the most significant byte comes first, e.g.\ 3-2-1-0,
1692 \item{Little-endian:}
1693 in which the least significant byte comes first, e.g.\ 0-1-2-3, and
1694 \item{Mixed-endian:}
1695 one of the less-common orderings that cannot be put into the above two
1696  categories, e.g.\ 3-1-2-0 or 0-2-1-3.
1697 \end{itemize}
1698
1699 The Theora bitpacking convention specifies storage and bitstream manipulation
1700  at the byte, not word, level.
1701 Thus host word ordering is of a concern only during optimization, when writing
1702  code that operates on a word of storage at a time rather than a byte.
1703 Logically, bytes are always encoded and decoded in order from byte zero through
1704  byte $n$.
1705
1706 \subsection{Bit Order}
1707
1708 A byte has a well-defined `least significant' bit (LSb), which is the only bit
1709  set when the byte is storing the two's complement integer value $+1$.
1710 A byte's `most significant' bit (MSb) is at the opposite end.
1711 Bits in a byte are numbered from zero at the LSb to $n$ for the MSb, where
1712  $n=7$ in an octet.
1713
1714 \section{Coding Bits into Bytes}
1715
1716 The Theora codec needs to encode arbitrary bit-width integers from zero to 32
1717  bits wide into packets.
1718 These integer fields are not aligned to the boundaries of the byte
1719  representation; the next field is read at the bit position immediately
1720  after the end of the previous field.
1721
1722 The decoder logically unpacks integers by first reading the MSb of a binary
1723  integer from the logical bitstream, followed by the next most significant
1724  bit, etc., until the required number of bits have been read.
1725 When unpacking the bytes into bits, the decoder begins by reading the MSb of
1726  the integer to be read from the most significant unread bit position of the
1727  source byte, followed by the next-most significant bit position of the
1728  destination integer, and so on up to the requested number of bits.
1729 Note that this differs from the Vorbis I codec, which
1730  begins decoding with the LSb of the source integer, reading it from the
1731  LSb of the source byte.
1732 When all the bits of the current source byte are read, decoding continues with
1733  the MSb of the next byte.
1734 Any unfilled bits in the last byte of the packet MUST be cleared to zero by the
1735  encoder.
1736
1737 \subsection{Signedness}
1738
1739 The binary integers decoded by the above process may be either signed or
1740  unsigned.
1741 This varies from integer to integer, and this specification
1742  indicates how each value should be interpreted as it is read.
1743 That is, depending on context, the three bit binary pattern \bin{111} can be
1744  taken to represent either `$7$' as an unsigned integer or `$-1$' as a signed,
1745  two's complement integer.
1746
1747 \subsection{Encoding Example}
1748
1749 The following example shows the state of an (8-bit) byte stream after several
1750  binary integers are encoded, including the location of the put pointer for the
1751  next bit to write to and the total length of the stream in bytes.
1752
1753 Encode the 4 bit unsigned integer value `12' (\bin{1100}) into an empty byte
1754  stream.
1755
1756 \begin{tabular}{r|ccccccccl}
1757 \multicolumn{1}{r}{}& &&&&$\downarrow$&&&& \\
1758          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1759 byte 0   & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1760                            0 & 0 & 0 & 0 & $\leftarrow$     \\
1761 byte 1   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                  \\
1762 byte 2   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                  \\
1763 byte 3   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                  \\
1764 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}& \\
1765 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1766 byte stream length: 1 byte
1767 \end{tabular}
1768 \vspace{\baselineskip}
1769
1770 Continue by encoding the 3 bit signed integer value `-1' (\bin{111}).
1771
1772 \begin{tabular}{r|ccccccccl}
1773 \multicolumn{1}{r}{} &&&&&&&&$\downarrow$& \\
1774          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1775 byte 0   & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1776            \textbf{1} & \textbf{1} & \textbf{1} & 0 & $\leftarrow$ \\
1777 byte 1   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                         \\
1778 byte 2   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                         \\
1779 byte 3   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                         \\
1780 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}&        \\
1781 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1782 byte stream length: 1 byte
1783 \end{tabular}
1784 \vspace{\baselineskip}
1785
1786 Continue by encoding the 7 bit integer value `17' (\bin{0010001}).
1787
1788 \begin{tabular}{r|ccccccccl}
1789 \multicolumn{1}{r}{} &&&&&&&$\downarrow$&& \\
1790          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & \\\cline{1-9}
1791 byte 0   & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1792            \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\
1793 byte 1   & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} &
1794            \textbf{0} & \textbf{1} & 0 & 0 & $\leftarrow$      \\
1795 byte 2   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                     \\
1796 byte 3   & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &                     \\
1797 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}&    \\
1798 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1799 byte stream length: 2 bytes
1800 \end{tabular}
1801 \vspace{\baselineskip}
1802
1803 Continue by encoding the 13 bit integer value `6969' (\bin{11011\ 00111001}).
1804
1805 \begin{tabular}{r|ccccccccl}
1806 \multicolumn{1}{r}{} &&&&$\downarrow$&&&&& \\
1807          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 &            \\\cline{1-9}
1808 byte 0   & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1809            \textbf{1} & \textbf{1} & \textbf{1} & \textbf{0} & \\
1810 byte 1   & \textbf{0} & \textbf{1} & \textbf{0} & \textbf{0} &
1811            \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\
1812 byte 2   & \textbf{0} & \textbf{1} & \textbf{1} & \textbf{0} &
1813            \textbf{0} & \textbf{1} & \textbf{1} & \textbf{1} & \\
1814 byte 3   & \textbf{0} & \textbf{0} & \textbf{1} &
1815                        0 & 0 & 0 & 0 & 0 & $\leftarrow$        \\
1816 \multicolumn{1}{c|}{$\vdots$}&\multicolumn{8}{c}{$\vdots$}&    \\
1817 byte $n$ & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 &
1818 byte stream length: 4 bytes
1819 \end{tabular}
1820 \vspace{\baselineskip}
1821
1822 \subsection{Decoding Example}
1823
1824 The following example shows the state of the (8-bit) byte stream encoded in the
1825  previous example after several binary integers are decoded, including the
1826  location of the get pointer for the next bit to read.
1827
1828 Read a two bit unsigned integer from the example encoded above.
1829
1830 \begin{tabular}{r|ccccccccl}
1831 \multicolumn{1}{r}{} &&&$\downarrow$&&&&&&              \\
1832          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 &              \\\cline{1-9}
1833 byte 0   & \textbf{1} & \textbf{1} & 0 & 0 & 1 & 1 & 1 & 0 & $\leftarrow$ \\
1834 byte 1   & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 &              \\
1835 byte 2   & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 &              \\
1836 byte 3   & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 &
1837 byte stream length: 4 bytes
1838 \end{tabular}
1839 \vspace{\baselineskip}
1840
1841 Value read: 3 (\bin{11}).
1842
1843 Read another two bit unsigned integer from the example encoded above.
1844
1845 \begin{tabular}{r|ccccccccl}
1846 \multicolumn{1}{r}{} &&&&&$\downarrow$&&&&              \\
1847          & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 &              \\\cline{1-9}
1848 byte 0   & \textbf{1} & \textbf{1} & \textbf{0} & \textbf{0} &
1849                            1 & 1 & 1 & 0 & $\leftarrow$ \\
1850 byte 1   & 0 & 1 & 0 & 0 & 0 & 1 & 1 & 1 &              \\
1851 byte 2   & 0 & 1 & 1 & 0 & 0 & 1 & 1 & 1 &              \\
1852 byte 3   & 0 & 0 & 1 & 0 & 0 & 0 & 0 & 0 &
1853 byte stream length: 4 bytes
1854 \end{tabular}
1855 \vspace{\baselineskip}
1856
1857 Value read: 0 (\bin{00}).
1858
1859 Two things are worth noting here.
1860 \begin{itemize}
1861 \item
1862 Although these four bits were originally written as a single four-bit integer,
1863  reading some other combination of bit-widths from the bitstream is well
1864  defined.
1865 No artificial alignment boundaries are maintained in the bitstream.
1866 \item
1867 The first value is the integer `$3$' only because the context stated we were
1868  reading an unsigned integer.
1869 Had the context stated we were reading a signed integer, the returned value
1870  would have been the integer `$-1$'.
1871 \end{itemize}
1872
1873 \subsection{End-of-Packet Alignment}
1874
1875 The typical use of bitpacking is to produce many independent byte-aligned
1876  packets which are embedded into a larger byte-aligned container structure,
1877  such as an Ogg transport bitstream.
1878 Externally, each bitstream encoded as a byte stream MUST begin and end on a
1879  byte boundary.
1880 Often, the encoded packet bitstream is not an integer number of bytes, and so
1881  there is unused space in the last byte of a packet.
1882
1883 %r: I think the generality here is necessary to be consistent with our assertions
1884 %r: elsewhere about being independent of transport and byte width
1885 When a Theora encoder produces packets for embedding in a byte-aligned
1886  container, unused space in the last byte of a packet is always zeroed during
1887  the encoding process.
1888 Thus, should this unused space be read, it will return binary zeroes.
1889 There is no marker pattern or stuffing bits that will allow the decoder to
1890  obtain the exact size, in bits, of the original bitstream.
1891 This knowledge is not required for decoding.
1892
1893 Attempting to read past the end of an encoded packet results in an
1894  `end-of-packet' condition.
1895 Any further read operations after an `end-of-packet' condition shall also
1896  return `end-of-packet'.
1897 Unlike Vorbis, Theora does not use truncated packets as a normal mode of
1898  operation.
1899 Therefore if a decoder encounters the `end-of-packet' condition during normal
1900  decoding, it may attempt to use the bits that were read to recover as much of
1901  encoded data as possible, signal a warning or error, or both.
1902
1903 \subsection{Reading Zero Bit Integers}
1904
1905 Reading a zero bit integer returns the value `$0$' and does not increment
1906  the stream pointer.
1907 Reading to the end of the packet, but not past the end, so that an
1908  `end-of-packet' condition is not triggered, and then reading a zero bit
1909  integer shall succeed, returning `$0$', and not trigger an `end-of-packet'
1910  condition.
1911 Reading a zero bit integer after a previous read sets the `end-of-packet'
1912  condition shall fail, also returning `end-of-packet'.
1913
1914 \chapter{Bitstream Headers}
1915 \label{sec:headers}
1916
1917 A Theora bitstream begins with three header packets.
1918 The header packets are, in order, the identification header, the comment
1919  header, and the setup header.
1920 All are required for decode compliance.
1921 An end-of-packet condition encountered while decoding the identification or
1922  setup header packets renders the stream undecodable.
1923 An end-of-packet condition encountered while decode the comment header is a
1924  non-fatal error condition, and MAY be ignored by a decoder.
1925
1926 \paragraph{VP3 Compatibility}
1927
1928 VP3 relies on the headers provided by its container, usually either AVI or
1929  Quicktime.
1930 As such, several parameters available in these headers are not available to VP3
1931  streams.
1932 These are indicated as they appear in the sections below.
1933
1934 \section{Common Header Decode}
1935 \label{sub:common-header}
1936
1937 \paragraph{Input parameters:} None.
1938
1939 \paragraph{Output parameters:}\hfill\\*
1940 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
1941 \multicolumn{1}{c}{Name} &
1942 \multicolumn{1}{c}{Type} &
1943 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
1944 \multicolumn{1}{c}{Signed?} &
1945 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
1946 \bitvar{HEADERTYPE} & Integer & 8 & No & The type of the header being
1947  decoded. \\
1948 \bottomrule\end{tabularx}
1949
1950 \paragraph{Variables used:} None.
1951 \medskip
1952
1953 Each header packet begins with the same header fields, which are decoded as
1954  follows:
1955
1956 \begin{enumerate}
1957 \item
1958 Read an 8-bit unsigned integer as \bitvar{HEADERTYPE}.
1959 If the most significant bit of this integer is not set, then stop.
1960 This is not a header packet.
1961 \item
1962 Read 6 8-bit unsigned integers.
1963 If these do not have the values \hex{74}, \hex{68}, \hex{65}, \hex{6F},
1964  \hex{72}, and \hex{61}, respectively, then stop.
1965 This stream is not decodable by this specification.
1966 These values correspond to the ASCII values of the characters `t', `h', `e',
1967  `o', `r', and `a'.
1968 \end{enumerate}
1969
1970 Decode continues according to \bitvar{HEADERTYPE}.
1971 The identification header is type \hex{80}, the comment header is type
1972  \hex{81}, and the setup header is type \hex{82}.
1973 These packets must occur in the order: identification, comment, setup.
1974 %r: I clarified the initial-bit scheme here
1975 %TBT: Dashes let the reader know they'll have to pick up the rest of the
1976 %TBT:  sentence after the explanatory phrase.
1977 %TBT: Otherwise it just sounds like the bit must exist.
1978 All header packets have the most significant bit of the type
1979  field---which is the initial bit in the packet---set.
1980 This distinguishes them from video data packets in which the first bit
1981  is unset.
1982 % extra header packets are a feature Dan argued for way back when for
1983 % backward-compatible extensions (and icc colourspace for example)
1984 % I think it's reasonable
1985 %TBT: You can always just stick more stuff in the setup header.
1986 Packets with other header types (\hex{83}--\hex{FF}) are reserved and MUST be
1987  ignored.
1988
1989 \section{Identification Header Decode}
1990 \label{sec:idheader}
1991
1992 \paragraph{Input parameters:} None.
1993
1994 \paragraph{Output parameters:}\hfill\\*
1995 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
1996 \multicolumn{1}{c}{Name} &
1997 \multicolumn{1}{c}{Type} &
1998 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
1999 \multicolumn{1}{c}{Signed?} &
2000 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2001 \bitvar{VMAJ}     & Integer &  8 & No & The major version number. \\
2002 \bitvar{VMIN}     & Integer &  8 & No & The minor version number. \\
2003 \bitvar{VREV}     & Integer &  8 & No & The version revision number. \\
2004 \bitvar{FMBW}     & Integer & 16 & No & The width of the frame in macro
2005  blocks. \\
2006 \bitvar{FMBH}     & Integer & 16 & No & The height of the frame in macro
2007  blocks. \\
2008 \bitvar{NSBS}     & Integer & 32 & No & The total number of super blocks in a
2009  frame. \\
2010 \bitvar{NBS}      & Integer & 36 & No & The total number of blocks in a
2011  frame. \\
2012 \bitvar{NMBS}     & Integer & 32 & No & The total number of macro blocks in a
2013  frame. \\
2014 \bitvar{PICW}     & Integer & 20 & No & The width of the picture region in
2015  pixels. \\
2016 \bitvar{PICH}     & Integer & 20 & No & The height of the picture region in
2017  pixels. \\
2018 \bitvar{PICX}     & Integer &  8 & No & The X offset of the picture region in
2019  pixels. \\
2020 \bitvar{PICY}     & Integer &  8 & No & The Y offset of the picture region in
2021  pixels. \\
2022 \bitvar{FRN}      & Integer & 32 & No & The frame-rate numerator. \\
2023 \bitvar{FRD}      & Integer & 32 & No & The frame-rate denominator. \\
2024 \bitvar{PARN}     & Integer & 24 & No & The pixel aspect-ratio numerator. \\
2025 \bitvar{PARD}     & Integer & 24 & No & The pixel aspect-ratio denominator. \\
2026 \bitvar{CS}       & Integer &  8 & No & The color space. \\
2027 \bitvar{PF}       & Integer &  2 & No & The pixel format. \\
2028 \bitvar{NOMBR}    & Integer & 24 & No & The nominal bitrate of the stream, in
2029  bits per second. \\
2030 \bitvar{QUAL}     & Integer &  6 & No & The quality hint. \\
2031 \bitvar{KFGSHIFT} & Integer &  5 & No & The amount to shift the key frame
2032  number by in the granule position. \\
2033 \bottomrule\end{tabularx}
2034
2035 \paragraph{Variables used:} None.
2036 \medskip
2037
2038 The identification header is a short header with only a few fields used to
2039  declare the stream definitively as Theora and provide detailed information
2040  about the format of the fully decoded video data.
2041 The identification header is decoded as follows:
2042
2043 \begin{enumerate}
2044 \item
2045 Decode the common header fields according to the procedure described in
2046  Section~\ref{sub:common-header}.
2047 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{80}, then stop.
2048 This packet is not the identification header.
2049 \item
2050 Read an 8-bit unsigned integer as \bitvar{VMAJ}.
2051 If \bitvar{VMAJ} is not $3$, then stop.
2052 This stream is not decodable according to this specification.
2053 \item
2054 Read an 8-bit unsigned integer as \bitvar{VMIN}.
2055 If \bitvar{VMIN} is not $2$, then stop.
2056 This stream is not decodable according to this specification.
2057 \item
2058 Read an 8-bit unsigned integer as \bitvar{VREV}.
2059 If \bitvar{VREV} is not $0$, then stop.
2060 This stream is not decodable according to this specification.
2061 \item
2062 Read a 16-bit unsigned integer as \bitvar{FMBW}.
2063 This MUST be greater than zero.
2064 This specifies the width of the coded frame in macro blocks.
2065 The actual width of the frame in pixels is $\bitvar{FMBW}*16$.
2066 \item
2067 Read a 16-bit unsigned integer as \bitvar{FMBH}.
2068 This MUST be greater than zero.
2069 This specifies the height of the coded frame in macro blocks.
2070 The actual height of the frame in pixels is $\bitvar{FMBH}*16$.
2071 \item
2072 Read a 24-bit unsigned integer as \bitvar{PICW}.
2073 This MUST be no greater than $(\bitvar{FMBW}*16)$.
2074 Note that 24 bits are read, even though only 20 bits are sufficient to specify
2075  any value of the picture width.
2076 This is done to preserve octet alignment in this header, to allow for a
2077  simplified parser implementation.
2078 \item
2079 Read a 24-bit unsigned integer as \bitvar{PICH}.
2080 This MUST be no greater than $(\bitvar{FMBH}*16)$.
2081 Together with \bitvar{PICW}, this specifies the size of the displayable picture
2082  region within the coded frame.
2083 See Figure~\ref{fig:pic-frame}.
2084 Again, 24 bits are read instead of 20.
2085 \item
2086 Read an 8-bit unsigned integer as \bitvar{PICX}.
2087 This MUST be no greater than $(\bitvar{FMBW}*16-\bitvar{PICX})$.
2088 \item
2089 Read an 8-bit unsigned integer as \bitvar{PICY}.
2090 This MUST be no greater than $(\bitvar{FMBH}*16-\bitvar{PICY})$.
2091 Together with \bitvar{PICX}, this specifies the location of the lower-left
2092  corner of the displayable picture region.
2093 See Figure~\ref{fig:pic-frame}.
2094 \item
2095 Read a 32-bit unsigned integer as \bitvar{FRN}.
2096 This MUST be greater than zero.
2097 \item
2098 Read a 32-bit unsigned integer as \bitvar{FRD}.
2099 This MUST be greater than zero.
2100 Theora is a fixed-frame rate video codec.
2101 Frames are sampled at the constant rate of $\frac{\bitvar{FRN}}{\bitvar{FRD}}$
2102  frames per second.
2103 The presentation time of the first frame is at zero seconds.
2104 No mechanism is provided to specify a non-zero offset for the initial
2105  frame.
2106 \item
2107 Read a 24-bit unsigned integer as \bitvar{PARN}.
2108 \item
2109 Read a 24-bit unsigned integer as \bitvar{PARD}.
2110 Together with \bitvar{PARN}, these specify the aspect ratio of the pixels
2111  within a frame, defined as the ratio of the physical width of a pixel to its
2112  physical height.
2113 This is given by the ratio $\bitvar{PARN}:\bitvar{PARD}$.
2114 If either of these fields are zero, this indicates that pixel aspect ratio
2115  information was not available to the encoder.
2116 In this case it MAY be specified by the application via an external means, or
2117  a default value of $1:1$ MAY be used.
2118 \item
2119 Read an 8-bit unsigned integer as \bitvar{CS}.
2120 This is a value from an enumerated list of the available color spaces, given in
2121  Table~\ref{tab:colorspaces}.
2122 The `Undefined' value indicates that color space information was not available
2123  to the encoder.
2124 It MAY be specified by the application via an external means.
2125 If a reserved value is given, a decoder MAY refuse to decode the stream.
2126 \begin{table}[htbp]
2127 \begin{center}
2128 \begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule
2129 Value    & Color Space                               \\\midrule
2130 $0$      & Undefined.                                \\
2131 $1$      & Rec.~470M (see Section~\ref{sec:470m}).   \\
2132 $2$      & Rec.~470BG (see Section~\ref{sec:470bg}). \\
2133 $3$      & Reserved.                                 \\
2134 $\vdots$ &                                           \\
2135 $255$    &                                           \\
2136 \bottomrule\end{tabular*}
2137 \end{center}
2138 \caption{Enumerated List of Color Spaces}
2139 \label{tab:colorspaces}
2140 \end{table}
2141 \item
2142 Read a 24-bit unsigned integer as \bitvar{NOMBR}.
2143 The \bitvar{NOMBR} field is used only as a hint.
2144 For pure VBR streams, this value may be considerably off.
2145 The field MAY be set to zero to indicate that the encoder did not care to
2146  speculate.
2147  %TODO: units?
2148 \item
2149 Read a 6-bit unsigned integer as \bitvar{QUAL}.
2150 This value is used to provide a hint as to the relative quality of the stream
2151  when compared to others produced by the same encoder.
2152 Larger values indicate higher quality.
2153 This can be used, for example, to select among several streams containing the
2154  same material encoded with different settings.
2155 \item
2156 Read a 5-bit unsigned integer as \bitvar{KFGSHIFT}.
2157 The \bitvar{KFGSHIFT} is used to partition the granule position associated with
2158  each packet into two different parts.
2159 The frame number of the last key frame, starting from zero, is stored in the
2160  upper $64-\bitvar{KFGSHIFT}$ bits, while the lower \bitvar{KFGSHIFT} bits
2161  contain the number of frames since the last keyframe.
2162 Complete details on the granule position mapping are specified in Section~REF.
2163 \item
2164 Read a 2-bit unsigned integer as \bitvar{PF}.
2165 The \bitvar{PF} field contains a value from an enumerated list of the available
2166  pixel formats, given in Table~\ref{tab:pixel-formats}.
2167 If the reserved value $1$ is given, stop.
2168 This stream is not decodable according to this specification.
2169
2170 \begin{table}[htbp]
2171 \begin{center}
2172 \begin{tabular*}{215pt}{cl@{\extracolsep{\fill}}c}\toprule
2173 Value & Pixel Format             \\\midrule
2174 $0$   & 4:2:0 (see Section~\ref{sec:420}). \\
2175 $1$   & Reserved.                \\
2176 $2$   & 4:2:2 (see Section~\ref{sec:422}). \\
2177 $3$   & 4:4:4 (see Section~\ref{sec:444}). \\
2178 \bottomrule\end{tabular*}
2179 \end{center}
2180 \caption{Enumerated List of Pixel Formats}
2181 \label{tab:pixel-formats}
2182 \end{table}
2183
2184 \item
2185 Read a 3-bit unsigned integer.
2186 These bits are reserved.
2187 If this value is not zero, then stop.
2188 This stream is not decodable according to this specification.
2189 \item
2190 Assign \bitvar{NSBS} a value according to \bitvar{PF}, as given by
2191  Table~\ref{tab:nsbs-for-pf}.
2192
2193 \begin{table}[bt]
2194 \begin{center}
2195 \begin{tabular}{cc}\toprule
2196 \bitvar{PF} & \bitvar{NSBS}                                     \\\midrule
2197 $0$         & $\begin{aligned}
2198 &((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\
2199 & +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+3)//4)
2200 \end{aligned}$                                                  \\\midrule
2201 $2$         & $\begin{aligned}
2202 &((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)\\
2203 & +2*((\bitvar{FMBW}+3)//4)*((\bitvar{FMBH}+1)//2)
2204 \end{aligned}$                                                  \\\midrule
2205 $3$         & $3*((\bitvar{FMBW}+1)//2)*((\bitvar{FMBH}+1)//2)$ \\
2206 \bottomrule\end{tabular}
2207 \end{center}
2208 \caption{Number of Super Blocks for each Pixel Format}
2209 \label{tab:nsbs-for-pf}
2210 \end{table}
2211
2212 \item
2213 Assign \bitvar{NBS} a value according to \bitvar{PF}, as given by
2214  Table~\ref{tab:nbs-for-pf}.
2215
2216 \begin{table}[tb]
2217 \begin{center}
2218 \begin{tabular}{cc}\toprule
2219 \bitvar{PF} & \bitvar{NBS}                     \\\midrule
2220 $0$         & $6*\bitvar{FMBW}*\bitvar{FMBH}$  \\\midrule
2221 $2$         & $8*\bitvar{FMBW}*\bitvar{FMBH}$  \\\midrule
2222 $3$         & $12*\bitvar{FMBW}*\bitvar{FMBH}$ \\
2223 \bottomrule\end{tabular}
2224 \end{center}
2225 \caption{Number of Blocks for each Pixel Format}
2226 \label{tab:nbs-for-pf}
2227 \end{table}
2228
2229 \item
2230 Assign \bitvar{NMBS} the value $(\bitvar{FMBW}*\bitvar{FMBH})$.
2231
2232 \end{enumerate}
2233
2234 \paragraph{VP3 Compatibility}
2235
2236 VP3 does not correctly handle frame sizes that are not a multiple of 16.
2237 Thus, \bitvar{PICW} and \bitvar{PICH} should be set to the frame width and
2238  height in pixels, respectively, and \bitvar{PICX} and \bitvar{PICY} should be
2239  set to zero.
2240 VP3 headers do not specify a color space.
2241 VP3 only supports the 4:2:0 pixel format.
2242
2243 \section{Comment Header}
2244 \label{sec:commentheader}
2245
2246 The Theora comment header is the second of three header packets that begin a
2247  Theora stream.
2248 It is meant for short text comments, not aribtrary metadata; arbitrary metadata
2249  belongs in a separate logical stream that provides greater structure and
2250  machine parseability.
2251
2252 %r: I tried to morph this a little more in the direction of our application space
2253 The comment field is meant to be used much like someone jotting a quick note on
2254  the label of a video.
2255 It should be a little information to remember the disc or tape by and explain it to
2256  others; a short, to-the-point text note that can be more than a couple words,
2257  but isn't going to be more than a short paragraph.
2258 The essentials, in other words, whatever they turn out to be, e.g.:
2259
2260 %TODO: Example
2261
2262 The comment header is stored as a logical list of eight-bit clean vectors; the
2263  number of vectors is bounded at $2^{32}-1$ and the length of each vector is
2264  limited to $2^{32}-1$ bytes.
2265 The vector length is encoded; the vector contents themselves are not null
2266  terminated.
2267 In addition to the vector list, there is a single vector for a vendor name,
2268  also eight-bit clean with a length encoded in 32 bits.
2269 %TODO: The 1.0 release of libtheora sets the vendor string to ...
2270
2271 \subsection{Comment Length Decode}
2272 \label{sub:comment-len}
2273
2274 \paragraph{Input parameters:} None.
2275
2276 \paragraph{Output parameters:}\hfill\\*
2277 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2278 \multicolumn{1}{c}{Name} &
2279 \multicolumn{1}{c}{Type} &
2280 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2281 \multicolumn{1}{c}{Signed?} &
2282 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2283 \bitvar{LEN}  & Integer & 32 & No & A single 32-bit length value. \\
2284 \bottomrule\end{tabularx}
2285
2286 \paragraph{Variables used:}\hfill\\*
2287 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2288 \multicolumn{1}{c}{Name} &
2289 \multicolumn{1}{c}{Type} &
2290 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2291 \multicolumn{1}{c}{Signed?} &
2292 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2293 \locvar{LEN0} & Integer &  8 & No & The first octet of the string length. \\
2294 \locvar{LEN1} & Integer &  8 & No & The second octet of the string length. \\
2295 \locvar{LEN2} & Integer &  8 & No & The third octet of the string length. \\
2296 \locvar{LEN3} & Integer &  8 & No & The fourth octet of the string
2297  length. \\
2298 \bottomrule\end{tabularx}
2299 \medskip
2300
2301 A single comment vector is decoded as follows:
2302
2303 \begin{enumerate}
2304 \item
2305 Read an 8-bit unsigned integer as \locvar{LEN0}.
2306 \item
2307 Read an 8-bit unsigned integer as \locvar{LEN1}.
2308 \item
2309 Read an 8-bit unsigned integer as \locvar{LEN2}.
2310 \item
2311 Read an 8-bit unsigned integer as \locvar{LEN3}.
2312 \item
2313 Assign \bitvar{LEN} the value $(\locvar{LEN0}+(\locvar{LEN1}<<8)+
2314  (\locvar{LEN2}<<16)+(\locvar{LEN3}<<24))$.
2315 This construction is used so that on platforms with 8-bit bytes, the memory
2316  organization of the comment header is identical with that of Vorbis I,
2317  allowing for common parsing code despite the different bit packing
2318  conventions.
2319 \end{enumerate}
2320
2321 \subsection{Comment Header Decode}
2322
2323 \paragraph{Input parameters:} None.
2324
2325 \paragraph{Output parameters:}\hfill\\*
2326 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2327 \multicolumn{1}{c}{Name} &
2328 \multicolumn{1}{c}{Type} &
2329 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2330 \multicolumn{1}{c}{Signed?} &
2331 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2332 \bitvar{VENDOR}    & \multicolumn{3}{l}{String}       & The vendor string. \\
2333 \bitvar{NCOMMENTS} & Integer                & 32 & No & The number of user
2334  comments. \\
2335 \bitvar{COMMENTS}  & \multicolumn{3}{l}{String Array} & A list of
2336  \bitvar{NCOMMENTS} user comment values. \\
2337 \bottomrule\end{tabularx}
2338
2339 \paragraph{Variables used:}\hfill\\*
2340 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2341 \multicolumn{1}{c}{Name} &
2342 \multicolumn{1}{c}{Type} &
2343 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2344 \multicolumn{1}{c}{Signed?} &
2345 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2346 \locvar{\ci} & Integer & 32 & No & The index of the current user
2347  comment. \\
2348 \bottomrule\end{tabularx}
2349 \medskip
2350
2351 The complete comment header is decoded as follows:
2352
2353 \begin{enumerate}
2354 \item
2355 Decode the common header fields according to the procedure described in
2356  Section~\ref{sub:common-header}.
2357 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{81}, then stop.
2358 This packet is not the comment header.
2359 \item
2360 Decode the length of the vendor string using the procedure given in
2361  Section~\ref{sub:comment-len} into \bitvar{LEN}.
2362 \item
2363 Read \bitvar{LEN} 8-bit unsigned integers.
2364 \item
2365 Set the string \bitvar{VENDOR} to the contents of these octets.
2366 \item
2367 Decode the number of user comments using the procedure given in
2368  Section~\ref{sub:comment-len} into \bitvar{LEN}.
2369 \item
2370 Assign \bitvar{NCOMMENTS} the value stored in \bitvar{LEN}.
2371 \item
2372 For each consecutive value of \locvar{\ci} from $0$ to
2373  $(\bitvar{NCOMMENTS}-1)$, inclusive:
2374 \begin{enumerate}
2375 \item
2376 Decode the length of the current user comment using the procedure given in
2377  Section~\ref{sub:comment-len} into \bitvar{LEN}.
2378 \item
2379 Read \bitvar{LEN} 8-bit unsigned integers.
2380 \item
2381 Set the string $\bitvar{COMMENTS}[\locvar{\ci}]$ to the contents of these
2382  octets.
2383 \end{enumerate}
2384 \end{enumerate}
2385
2386 The comment header comprises the entirety of the second header packet.
2387 Unlike the first header packet, it is not generally the only packet on the
2388  second page and may span multiple pages.
2389 The length of the comment header packet is (practically) unbounded.
2390 The comment header packet is not optional; it must be present in the stream
2391  even if it is logically empty.
2392
2393 %TODO: \paragraph{VP3 Compatibility}
2394
2395 \subsection{User Comment Format}
2396
2397 The user comment vectors are structured similarly to a UNIX environment
2398  variable.
2399 That is, comment fields consist of a field name and a corresponding value and
2400  look like:
2401 \begin{center}
2402 \begin{tabular}{rcl}
2403 $\bitvar{COMMENTS}[0]$ & = & ``TITLE=the look of Theora" \\
2404 $\bitvar{COMMENTS}[1]$ & = & ``DIRECTOR=me"
2405 \end{tabular}
2406 \end{center}
2407
2408 The field name is case-insensitive and MUST consist of ASCII characters
2409  \hex{20} through \hex{7D}, \hex{3D} (`=') excluded.
2410 ASCII \hex{41} through \hex{5A} inclusive (characters `A'--`Z') are to be
2411  considered equivalent to ASCII \hex{61} through \hex{7A} inclusive
2412  (characters `a'--`z').
2413 An entirely empty field name---one that is zero characters long---is not
2414  disallowed.
2415
2416 The field name is immediately followed by ASCII \hex{3D} (`='); this equals
2417  sign is used to terminate the field name.
2418
2419 The data immediately after \hex{3D} until the end of the vector is the eight-bit
2420  clean value of the field contents encoded as a UTF-8 string~\cite{rfc2044}.
2421
2422 Field names MUST NOT be `internationalized'; this is a concession to
2423  simplicity, not an attempt to exclude the majority of the world that doesn't
2424  speak English.
2425 Applications MAY wish to present internationalized versions of the standard
2426  field names listed below to the user, but they are not to be stored in the
2427  bitstream.
2428 Field {\em contents}, however, use the UTF-8 character encoding to allow easy
2429  representation of any language.
2430
2431 Individual `vendors' MAY use non-standard field names within reason.
2432 The proper use of comment fields as human-readable notes has already been
2433  explained.
2434 Abuse will be discouraged.
2435
2436 There is no vendor-specific prefix to `non-standard' field names.
2437 Vendors SHOULD make some effort to avoid arbitrarily polluting the common
2438  namespace.
2439 %"and other bodies"?
2440 %If you're going to be that vague, you might as well not say anything at all.
2441 Xiph.org and other bodies will generally collect and rationalize the more
2442  useful tags to help with standardization.
2443
2444 Field names are not restricted to occur only once within a comment header.
2445 %TODO: Example
2446
2447 \paragraph{Field Names}
2448
2449 Below is a proposed, minimal list of standard field names with a description of
2450  their intended use.
2451 No field names are mandatory; a comment header may contain one or more, all, or
2452  none of the names in this list.
2453
2454 \begin{description}
2455 \item{TITLE:} Video name.
2456 %TODO: Complete list
2457 \end{description}
2458
2459 \section{Setup Header}
2460 \label{sec:setupheader}
2461
2462 The Theora setup header contains the limit values used to drive the loop
2463  filter, the base matrices and scale values used to build the dequantization
2464  tables, and the Huffman tables used to unpack the DCT tokens.
2465 Because the contents of this header are specific to Theora, no concessions have
2466  been made to keep the fields octet-aligned for easy parsing.
2467
2468 \subsection{Loop Filter Limit Table Decode}
2469 \label{sub:loop-filter-limits}
2470
2471 \paragraph{Input parameters:} None.
2472
2473 \paragraph{Output parameters:}\hfill\\*
2474 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2475 \multicolumn{1}{c}{Name} &
2476 \multicolumn{1}{c}{Type} &
2477 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2478 \multicolumn{1}{c}{Signed?} &
2479 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2480 \bitvar{LFLIMS}    & \multicolumn{1}{p{40pt}}{Integer array} &
2481                               7 & No & A 64-element array of loop filter limit
2482  values. \\
2483 \bottomrule\end{tabularx}
2484
2485 \paragraph{Variables used:}\hfill\\*
2486 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2487 \multicolumn{1}{c}{Name} &
2488 \multicolumn{1}{c}{Type} &
2489 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2490 \multicolumn{1}{c}{Signed?} &
2491 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2492 \locvar{\qi}    & Integer &  6 & No & The quantization index. \\
2493 \locvar{NBITS}  & Integer &  3 & No & The size of values being read in the
2494  current table. \\
2495 \bottomrule\end{tabularx}
2496 \medskip
2497
2498 This procedure decodes the table of loop filter limit values used to drive the
2499  loop filter, which is described in Section~\ref{sub:loop-filter-limits}.
2500 It is decoded as follows:
2501
2502 \begin{enumerate}
2503 \item
2504 Read a 3-bit unsigned integer as \locvar{NBITS}.
2505 \item
2506 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2507 \begin{enumerate}
2508 \item
2509 Read an \locvar{NBITS}-bit unsigned integer as $\bitvar{LFLIMS}[\locvar{\qi}]$.
2510 \end{enumerate}
2511 \end{enumerate}
2512
2513 \paragraph{VP3 Compatibility}
2514
2515 The loop filter limit values are hardcoded in VP3.
2516 The values used are given in Appendix~\ref{app:vp3-loop-filter-limits}.
2517
2518 \subsection{Quantization Parameters Decode}
2519 \label{sub:quant-params}
2520
2521 \paragraph{Input parameters:} None.
2522
2523 \paragraph{Output parameters:}\hfill\\*
2524 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2525 \multicolumn{1}{c}{Name} &
2526 \multicolumn{1}{c}{Type} &
2527 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2528 \multicolumn{1}{c}{Signed?} &
2529 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2530 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2531                              16 & No & A 64-element array of scale values for
2532  AC coefficients for each \qi\ value. \\
2533 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2534                              16 & No & A 64-element array of scale values for
2535  the DC coefficient for each \qi\ value. \\
2536 \bitvar{NBMS}    & Integer & 10 & No & The number of base matrices. \\
2537 \bitvar{BMS}     & \multicolumn{1}{p{50pt}}{2D Integer array} &
2538                               8 & No & A $\bitvar{NBMS}\times 64$ array
2539  containing the base matrices. \\
2540 \bitvar{NQRS}    & \multicolumn{1}{p{50pt}}{2D Integer array} &
2541                               6 & No & A $2\times 3$ array containing the
2542  number of quant ranges for a given \qti\ and \pli, respectively.
2543 This is at most $63$. \\
2544 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2545                               6 & No & A $2\times 3\times 63$ array of the
2546  sizes of each quant range for a given \qti\ and \pli, respectively.
2547 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
2548 \bitvar{QRBMIS}  & \multicolumn{1}{p{50pt}}{3D Integer array} &
2549                               9 & No & A $2\times 3\times 64$ array of the
2550  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
2551 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
2552 \bottomrule\end{tabularx}
2553
2554 \paragraph{Variables used:}\hfill\\*
2555 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2556 \multicolumn{1}{c}{Name} &
2557 \multicolumn{1}{c}{Type} &
2558 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2559 \multicolumn{1}{c}{Signed?} &
2560 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2561 \locvar{\qti}    & Integer &  1 & No & A quantization type index.
2562 See Table~\ref{tab:quant-types}.\\
2563 \locvar{\qtj}    & Integer &  1 & No & A quantization type index. \\
2564 \locvar{\pli}    & Integer &  2 & No & A color plane index.
2565 See Table~\ref{tab:color-planes}.\\
2566 \locvar{\plj}    & Integer &  2 & No & A color plane index. \\
2567 \locvar{\qi}     & Integer &  6 & No & The quantization index. \\
2568 \locvar{\ci}     & Integer &  6 & No & The DCT coefficient index. \\
2569 \locvar{\bmi}    & Integer &  9 & No & The base matrix index. \\
2570 \locvar{\qri}    & Integer &  6 & No & The quant range index. \\
2571 \locvar{NBITS}   & Integer &  5 & No & The size of fields to read. \\
2572 \locvar{NEWQR}   & Integer &  1 & No & Flag that indicates a new set of quant
2573  ranges will be defined. \\
2574 \locvar{RPQR}    & Integer &  1 & No & Flag that indicates the quant ranges to
2575  copy will come from the same color plane. \\
2576 \bottomrule\end{tabularx}
2577 \medskip
2578
2579 The AC scale and DC scale values are defined in two simple tables with 64
2580  values each, one for each \qi\ value.
2581 The same scale values are used for every quantization type and color plane.
2582
2583 The base matrices for all quantization types and color planes are stored in a
2584  single table.
2585 These are then referenced by index in several sets of \term{quant ranges}.
2586 The purpose of the quant ranges is to specify which base matrices are used for
2587  which \qi\ values.
2588
2589 A set of quant ranges is defined for each quantization type and color plane.
2590 To save space in the header, bit flags allow a set of quant ranges to be copied
2591  from a previously defined set instead of being specified explicitly.
2592 Every set except the first one can be copied from the immediately preceding
2593  set.
2594 Similarly, if the quantization type is not $0$, the set can be copied from the
2595  set defined for the same color plane for the preceding quantization type.
2596 This formulation allows compact representation of, for example, the same
2597  set of quant ranges in both chroma channels, as is done in the original VP3,
2598  or the same set of quant ranges in INTRA and INTER modes.
2599
2600 Each quant range is defined by a size and two base matrix indices, one for each
2601  end of the range.
2602 The base matrix for the end of one range is used as the start of the next
2603  range, so that for $n$ ranges, $n+1$ base matrices are specified.
2604 The base matrices for the \qi\ values between the two endpoints of the range
2605  are generated by linear interpolation.
2606
2607 %TODO: figure
2608
2609 The location of the endpoints of each range is encoded by their size.
2610 The \qi\ value for the left end-point is the sum of the sizes of all preceding
2611  ranges, and the \qi\ value for the right end-point adds the size of the
2612  current range.
2613 Thus the sum of the sizes of all the ranges MUST be 63, so that the last range
2614  falls on the last possible \qi\ value.
2615
2616 The complete set of quantization parameters are decoded as follows:
2617
2618 \begin{enumerate}
2619 \item
2620 Read a 4-bit unsigned integer.
2621 Assign \locvar{NBITS} the value read, plus one.
2622 \item
2623 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2624 \begin{enumerate}
2625 \item
2626 Read an \locvar{NBITS}-bit unsigned integer as
2627  $\bitvar{ACSCALE}[\locvar{\qi}]$.
2628 \end{enumerate}
2629 \item
2630 Read a 4-bit unsigned integer.
2631 Assign \locvar{NBITS} the value read, plus one.
2632 \item
2633 For each consecutive value of \locvar{\qi} from $0$ to $63$, inclusive:
2634 \begin{enumerate}
2635 \item
2636 Read an \locvar{NBITS}-bit unsigned integer as
2637  $\bitvar{DCSCALE}[\locvar{\qi}]$.
2638 \end{enumerate}
2639 \item
2640 Read a 9-bit unsigned integer.
2641 Assign \bitvar{NBMS} the value decoded, plus one.
2642 \bitvar{NBMS} MUST be no greater than 384.
2643 \item
2644 For each consecutive value of \locvar{\bmi} from $0$ to $(\bitvar{NBMS}-1)$,
2645  inclusive:
2646 \begin{enumerate}
2647 \item
2648 For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive:
2649 \begin{enumerate}
2650 \item
2651 Read an 8-bit unsigned integer as $\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]$.
2652 \end{enumerate}
2653 \end{enumerate}
2654 \item
2655 For each consecutive value of \locvar{\qti} from $0$ to $1$, inclusive:
2656 \begin{enumerate}
2657 \item
2658 For each consecutive value of \locvar{\pli} from $0$ to $2$, inclusive:
2659 \begin{enumerate}
2660 \item
2661 If $\locvar{\qti}>0$ or $\locvar{\pli}>0$, read a 1-bit unsigned integer as
2662  \locvar{NEWQR}.
2663 \item
2664 Else, assign \locvar{NEWQR} the value one.
2665 \item
2666 If \locvar{NEWQR} is zero, then we are copying a previously defined set of
2667  quant ranges.
2668 In that case:
2669 \begin{enumerate}
2670 \item
2671 If $\locvar{\qti}>0$, read a 1-bit unsigned integer as \locvar{RPQR}.
2672 \item
2673 Else, assign \locvar{RPQR} the value zero.
2674 \item
2675 If \locvar{RPQR} is one, assign \locvar{\qtj} the value $(\locvar{\qti}-1)$
2676  and assign \locvar{\plj} the value \locvar{\pli}.
2677 This selects the set of quant ranges defined for the same color plane as this
2678  one, but for the previous quantization type.
2679 \item
2680 Else assign \locvar{\qtj} the value $(3*\locvar{\qti}+\locvar{\pli}-1)//3$ and
2681  assign \locvar{\plj} the value $(\locvar{\pli}+2)\%3$.
2682 This selects the most recent set of quant ranges defined.
2683 \item
2684 Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value
2685  $\bitvar{NQRS}[\locvar{\qtj}][\locvar{\plj}]$.
2686 \item
2687 Assign $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}]$ the values in
2688  $\bitvar{QRSIZES}[\locvar{\qtj}][\locvar{\plj}]$.
2689 \item
2690 Assign $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}]$ the values in
2691  $\bitvar{QRBMIS}[\locvar{\qtj}][\locvar{\plj}]$.
2692 \end{enumerate}
2693 \item
2694 Else, \locvar{NEWQR} is one, which indicates that we are defining a new set of
2695  quant ranges.
2696 In that case:
2697 \begin{enumerate}
2698 \item
2699 Assign $\locvar{\qri}$ the value zero.
2700 \item
2701 Assign $\locvar{\qi}$ the value zero.
2702 \item
2703 Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\
2704  $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2705 If this is greater than or equal to \bitvar{NBMS}, stop.
2706 The stream is undecodable.
2707 \item
2708 \label{step:qr-loop}
2709 Read an $\ilog(63-\locvar{\qi})$-bit unsigned integer.
2710 Assign\\ $\bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$ the value
2711  read, plus one.
2712 \item
2713 Assign \locvar{\qi} the value $\locvar{\qi}+
2714  \bitvar{QRSIZES}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2715 \item
2716 Assign \locvar{\qri} the value $\locvar{\qri}+1$.
2717 \item
2718 Read an $\ilog(\bitvar{NBMS}-1)$-bit unsigned integer as\\
2719  $\bitvar{QRBMIS}[\locvar{\qti}][\locvar{\pli}][\locvar{\qri}]$.
2720 \item
2721 If \locvar{\qi} is less than 63, go back to step~\ref{step:qr-loop}.
2722 \item
2723 If \locvar{\qi} is greater than 63, stop.
2724 The stream is undecodable.
2725 \item
2726 Assign $\bitvar{NQRS}[\locvar{\qti}][\locvar{\pli}]$ the value \locvar{\qri}.
2727 \end{enumerate}
2728 \end{enumerate}
2729 \end{enumerate}
2730 \end{enumerate}
2731
2732 \paragraph{VP3 Compatibility}
2733
2734 The quantization parameters are hardcoded in VP3.
2735 The values used are given in Appendix~\ref{app:vp3-quant-params}.
2736
2737 \subsection{Computing a Quantization Matrix}
2738 \label{sub:quant-mat}
2739
2740 \paragraph{Input parameters:}\hfill\\*
2741 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2742 \multicolumn{1}{c}{Name} &
2743 \multicolumn{1}{c}{Type} &
2744 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2745 \multicolumn{1}{c}{Signed?} &
2746 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2747 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2748                              16 & No & A 64-element array of scale values for
2749  AC coefficients for each \qi\ value. \\
2750 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
2751                              16 & No & A 64-element array of scale values for
2752  the DC coefficient for each \qi\ value. \\
2753 \bitvar{BMS}     & \multicolumn{1}{p{50pt}}{2D Integer array} &
2754                               8 & No & A $\bitvar{NBMS}\times 64$ array
2755  containing the base matrices. \\
2756 \bitvar{NQRS}    & \multicolumn{1}{p{50pt}}{2D Integer array} &
2757                               6 & No & A $2\times 3$ array containing the
2758  number of quant ranges for a given \qti\ and \pli, respectively.
2759 This is at most $63$. \\
2760 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
2761                               6 & No & A $2\times 3\times 63$ array of the
2762  sizes of each quant range for a given \qti\ and \pli, respectively.
2763 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
2764 \bitvar{QRBMIS}  & \multicolumn{1}{p{50pt}}{3D Integer array} &
2765                               9 & No & A $2\times 3\times 64$ array of the
2766  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
2767 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
2768 \bitvar{\qti}    & Integer &  1 & No & A quantization type index.
2769 See Table~\ref{tab:quant-types}.\\
2770 \bitvar{\pli}    & Integer &  2 & No & A color plane index.
2771 See Table~\ref{tab:color-planes}.\\
2772 \bitvar{\qi}     & Integer &  6 & No & The quantization index. \\
2773 \bottomrule\end{tabularx}
2774
2775 \paragraph{Output parameters:}\hfill\\*
2776 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2777 \multicolumn{1}{c}{Name} &
2778 \multicolumn{1}{c}{Type} &
2779 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2780 \multicolumn{1}{c}{Signed?} &
2781 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2782 \bitvar{QMAT} & \multicolumn{1}{p{40pt}}{Integer array} &
2783                              16 & No & A 64-element array of quantization
2784  values for each DCT coefficient in natural order. \\
2785 \bottomrule\end{tabularx}
2786
2787 \paragraph{Variables used:}\hfill\\*
2788 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2789 \multicolumn{1}{c}{Name} &
2790 \multicolumn{1}{c}{Type} &
2791 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2792 \multicolumn{1}{c}{Signed?} &
2793 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2794 \locvar{\ci}     & Integer &  6 & No & The DCT coefficient index. \\
2795 \locvar{\bmi}    & Integer &  9 & No & The base matrix index. \\
2796 \locvar{\bmj}    & Integer &  9 & No & The base matrix index. \\
2797 \locvar{\qri}    & Integer &  6 & No & The quant range index. \\
2798 \locvar{QISTART} & Integer &  6 & No & The left end-point of the \qi\ range. \\
2799 \locvar{QIEND  } & Integer &  6 & No & The right end-point of the \qi\ range. \\
2800 \locvar{BM}      & \multicolumn{1}{p{40pt}}{Integer array} &
2801                               8 & No & A 64-element array containing the
2802  interpolated base matrix. \\
2803 \locvar{QMIN}    & Integer & 16 & No & The minimum quantization value allowed
2804  for the current coefficient. \\
2805 \locvar{QSCALE}  & Integer & 16 & No & The current scale value. \\
2806 \bottomrule\end{tabularx}
2807 \medskip
2808
2809 The following procedure can be used to generate a single quantization matrix
2810  for a given quantization type, color plane, and \qi\ value, given the
2811  quantization parameters decoded in Section~\ref{sub:quant-params}.
2812
2813 Note that the product of the scale value and the base matrix value is in units
2814  of $100$ths of a pixel value, and thus is divided by $100$ to return it to
2815  units of a single pixel value.
2816 This value is then scaled by four, to match the scaling of the DCT output,
2817  which is also a factor of four larger than the orthonormal version of the
2818  transform.
2819
2820 \begin{enumerate}
2821 \item
2822 Assign \locvar{\qri} the index of a quant range such that
2823 \begin{displaymath}
2824 \sum_{\qrj=0}^{\locvar{\qri}-1}
2825  \bitvar{\qi} \ge \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj],
2826 \end{displaymath}
2827  and
2828 \begin{displaymath}
2829 \sum_{\qrj=0}^{\locvar{\qri}}
2830  \bitvar{\qi} \le \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj],
2831 \end{displaymath}
2832  where summation from $0$ to $-1$ is defined to be zero.
2833 If there is more than one such value of $\locvar{\qri}$, i.e., if \bitvar{\qi}
2834  lies on the boundary between two quant ranges, then the output will be the
2835  same regardless of which one is chosen.
2836 \item
2837 Assign \locvar{QISTART} the value
2838 \begin{displaymath}
2839 \sum_{\qrj=0}^{\qri-1} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj].
2840 \end{displaymath}
2841 \item
2842 Assign \locvar{QIEND} the value
2843 \begin{displaymath}
2844 \sum_{\qrj=0}^{\qri} \bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\qrj].
2845 \end{displaymath}
2846 \item
2847 Assign \locvar{\bmi} the value
2848  $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri]$.
2849 \item
2850 Assign \locvar{\bmj} the value
2851  $\bitvar{QRBMIS}[\bitvar{\qti}][\bitvar{\pli}][\qri+1]$.
2852 \item
2853 For each consecutive value of \locvar{\ci} from $0$ to $63$, inclusive:
2854 \begin{enumerate}
2855 \item
2856 Assign $\locvar{BM}[\locvar{\ci}]$ the value
2857 \begin{displaymath}
2858 \begin{split}
2859 (&2*(\locvar{QIEND}-\bitvar{\qi})*\bitvar{BMS}[\locvar{\bmi}][\locvar{\ci}]\\
2860  &+2*(\bitvar{\qi}-
2861    \locvar{QISTART})*\bitvar{BMS}[\locvar{\bmj}][\locvar{\ci}]\\
2862  &+\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}])//
2863  (2*\bitvar{QRSIZES}[\bitvar{\qti}][\bitvar{\pli}][\locvar{\qri}])
2864 \end{split}
2865 \end{displaymath}
2866 \item
2867 Assign \locvar{QMIN} the value given by Table~\ref{tab:qmin} according to
2868  \bitvar{\qti} and \locvar{\ci}.
2869
2870 \begin{table}[htbp]
2871 \begin{center}
2872 \begin{tabular}{clr}\toprule
2873 Coefficient      & \multicolumn{1}{c}{\bitvar{\qti}}
2874                                & \locvar{QMIN} \\\midrule
2875 $\locvar{\ci}=0$ & $0$ (Intra) & $16$          \\
2876 $\locvar{\ci}>0$ & $0$ (Intra) & $8$           \\
2877 $\locvar{\ci}=0$ & $1$ (Inter) & $32$          \\
2878 $\locvar{\ci}>0$ & $1$ (Inter) & $16$          \\
2879 \bottomrule\end{tabular}
2880 \end{center}
2881 \caption{Minimum Quantization Values}
2882 \label{tab:qmin}
2883 \end{table}
2884
2885 \item
2886 If \locvar{\ci} equals zero, assign $\locvar{QSCALE}$ the value
2887  $\bitvar{DCSCALE}[\bitvar{\qi}]$.
2888 \item
2889 Else, assign $\locvar{QSCALE}$ the value
2890  $\bitvar{ACSCALE}[\bitvar{\qi}]$.
2891 \item
2892 Assign $\bitvar{QMAT}[\locvar{\ci}]$ the value
2893 \begin{displaymath}
2894 \max(\locvar{QMIN},
2895  \min((\locvar{QSCALE}*\locvar{BM}[\locvar{\ci}]//100)*4,4096)).
2896 \end{displaymath}
2897 \end{enumerate}
2898 \end{enumerate}
2899
2900 \subsection{DCT Token Huffman Tables}
2901 \label{sub:huffman-tables}
2902
2903 \paragraph{Input parameters:} None.
2904
2905 \paragraph{Output parameters:}\hfill\\*
2906 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2907 \multicolumn{1}{c}{Name} &
2908 \multicolumn{1}{c}{Type} &
2909 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2910 \multicolumn{1}{c}{Signed?} &
2911 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2912 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
2913                                      & An 80-element array of Huffman tables
2914  with up to 32 entries each. \\
2915 \bottomrule\end{tabularx}
2916
2917 \paragraph{Variables used:}\hfill\\*
2918 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
2919 \multicolumn{1}{c}{Name} &
2920 \multicolumn{1}{c}{Type} &
2921 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
2922 \multicolumn{1}{c}{Signed?} &
2923 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
2924 \locvar{HBITS}   & Bit string & 32 & No & A string of up to 32 bits. \\
2925 \locvar{TOKEN}   & Integer    &  5 & No & A single DCT token value. \\
2926 \locvar{ISLEAF}  & Integer    &  1 & No & Flag that indicates if the current
2927  node of the tree being decoded is a leaf node. \\
2928 \bottomrule\end{tabularx}
2929 \medskip
2930
2931 The Huffman tables used to decode DCT tokens are stored in the setup header in
2932  the form of a binary tree.
2933 This enforces the requirements that the code be full---so that any sequence of
2934  bits will produce a valid sequence of tokens---and that the code be
2935  prefix-free so that there is no ambiguity when decoding.
2936
2937 One more restriction is placed on the tables that is not explicitly enforced by
2938  the bitstream syntax, but nevertheless must be obeyed by compliant encoders.
2939 There must be no more than 32 entries in a single table.
2940 Note that this restriction along with the fullness requirement limit the
2941  maximum size of a single Huffman code to 32 bits.
2942 It is probably a good idea to enforce this latter consequence explicitly when
2943  implementing the decoding procedure as a recursive algorithm, so as to prevent
2944  a possible stack overflow given an invalid bitstream.
2945
2946 Although there are 32 different DCT tokens, and thus a normal table will have
2947  exactly 32 entries, this is not explicitly required.
2948 It is allowable to use a Huffman code that omits some---but not all---of the
2949  possible token values.
2950 It is also allowable, if not particularly useful, to specify multiple codes for
2951  the same token value in a single table.
2952 Note also that token values may appear in the tree in any order.
2953 In particular, it is not safe to assume that token value zero (which ends a
2954  single block), has a Huffman code of all zeros.
2955
2956 The tree is decoded as follows:
2957
2958 \begin{enumerate}
2959 \item
2960 For each consecutive value of \locvar{\hti} from $0$ to $80$, inclusive:
2961 \begin{enumerate}
2962 \item
2963 Set \locvar{HBITS} to the empty string.
2964 \item
2965 \label{step:huff-tree-loop}
2966 If \locvar{HBITS} is longer than 32 bits in length, stop.
2967 The stream is undecodable.
2968 \item
2969 Read a 1-bit unsigned integer as \locvar{ISLEAF}.
2970 \item
2971 If \locvar{ISLEAF} is one:
2972 \begin{enumerate}
2973 \item
2974 If the number of entries in table $\bitvar{HTS}[\locvar{\hti}]$ is already 32,
2975  stop.
2976 The stream is undecodable.
2977 \item
2978 Read a 5-bit unsigned integer as \locvar{TOKEN}.
2979 \item
2980 Add the pair $(\locvar{HBITS},\locvar{TOKEN})$ to Huffman table
2981  $\bitvar{HTS}[\locvar{\hti}]$.
2982 \end{enumerate}
2983 \item
2984 Otherwise:
2985 \begin{enumerate}
2986 \item
2987 Add a `0' to the end of \locvar{HBITS}.
2988 \item
2989 Decode the `0' sub-tree using this procedure, starting from
2990  step~\ref{step:huff-tree-loop}.
2991 \item
2992 Remove the `0' from the end of \locvar{HBITS} and add a `1' to the end of
2993  \locvar{HBITS}.
2994 \item
2995 Decode the `1' sub-tree using this procedure, starting from
2996  step~\ref{step:huff-tree-loop}.
2997 \item
2998 Remove the `1' from the end of \locvar{HBITS}.
2999 \end{enumerate}
3000 \end{enumerate}
3001 \end{enumerate}
3002
3003 \paragraph{VP3 Compatibility}
3004
3005 The DCT token Huffman tables are hardcoded in VP3.
3006 The values used are given in Appendix~\ref{app:vp3-huffman-tables}.
3007
3008 \subsection{Setup Header Decode}
3009
3010 \paragraph{Input parameters:} None.
3011
3012 \paragraph{Output parameters:}\hfill\\*
3013 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3014 \multicolumn{1}{c}{Name} &
3015 \multicolumn{1}{c}{Type} &
3016 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3017 \multicolumn{1}{c}{Signed?} &
3018 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3019 \bitvar{LFLIMS}  & \multicolumn{1}{p{40pt}}{Integer array} &
3020                               7 & No & A 64-element array of loop filter limit
3021  values. \\
3022 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
3023                              16 & No & A 64-element array of scale values for
3024  AC coefficients for each \qi\ value. \\
3025 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
3026                              16 & No & A 64-element array of scale values for
3027  the DC coefficient for each \qi\ value. \\
3028 \bitvar{NBMS}    & Integer & 10 & No & The number of base matrices. \\
3029 \bitvar{BMS}     & \multicolumn{1}{p{50pt}}{2D Integer array} &
3030                               8 & No & A $\bitvar{NBMS}\times 64$ array
3031  containing the base matrices. \\
3032 \bitvar{NQRS}    & \multicolumn{1}{p{50pt}}{2D Integer array} &
3033                               6 & No & A $2\times 3$ array containing the
3034  number of quant ranges for a given \qti\ and \pli, respectively.
3035 This is at most $63$. \\
3036 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
3037                               6 & No & A $2\times 3\times 63$ array of the
3038  sizes of each quant range for a given \qti\ and \pli, respectively.
3039 Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\
3040 \bitvar{QRBMIS}  & \multicolumn{1}{p{50pt}}{3D Integer array} &
3041                               9 & No & A $2\times 3\times 64$ array of the
3042  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
3043 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\
3044 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
3045                                      & An 80-element array of Huffman tables
3046  with up to 32 entries each. \\
3047 \bottomrule\end{tabularx}
3048
3049 \paragraph{Variables used:} None.
3050 \medskip
3051
3052 The complete setup header is decoded as follows:
3053
3054 \begin{enumerate}
3055 \item
3056 Decode the common header fields according to the procedure described in
3057  Section~\ref{sub:common-header}.
3058 If \bitvar{HEADERTYPE} returned by this procedure is not \hex{82}, then stop.
3059 This packet is not the setup header.
3060 \item
3061 Decode the loop filter limit value table using the procedure given in
3062  Section~\ref{sub:loop-filter-limits} into \bitvar{LFLIMS}.
3063 \item
3064 Decode the quantization parameters using the procedure given in
3065  Section~\ref{sub:quant-params}.
3066 The results are stored in \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{NBMS},
3067  \bitvar{BMS}, \bitvar{NQRS}, \bitvar{QRSIZES}, and \bitvar{QRBMIS}.
3068 \item
3069 Decode the DCT token Huffman tables using the procedure given in
3070  Section~\ref{sub:huffman-tables} into \bitvar{HTS}.
3071 \end{enumerate}
3072
3073 \chapter{Frame Decode}
3074
3075 This section describes the complete procedure necessary to decode a single
3076  frame.
3077 This begins with the frame header, followed by coded block flags, macro block
3078  modes, motion vectors, block-level \qi\ values, and finally the DCT residual
3079  tokens, which are used to reconstruct the frame.
3080
3081 \section{Frame Header Decode}
3082 \label{sub:frame-header}
3083
3084 \paragraph{Input parameters:} None.
3085
3086 \paragraph{Output parameters:}\hfill\\*
3087 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3088 \multicolumn{1}{c}{Name} &
3089 \multicolumn{1}{c}{Type} &
3090 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3091 \multicolumn{1}{c}{Signed?} &
3092 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3093 \bitvar{FTYPE}   & Integer &  1 & No & The frame type. \\
3094 \bitvar{NQIS}    & Integer &  2 & No & The number of \qi\ values. \\
3095 \bitvar{QIS}     & \multicolumn{1}{p{40pt}}{Integer array} &
3096                              6 & No & An \bitvar{NQIS}-element array of
3097  \qi\ values. \\
3098 \bottomrule\end{tabularx}
3099
3100 \paragraph{Variables used:}\hfill\\*
3101 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3102 \multicolumn{1}{c}{Name} &
3103 \multicolumn{1}{c}{Type} &
3104 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3105 \multicolumn{1}{c}{Signed?} &
3106 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3107 \locvar{MOREQIS} & Integer &  1 & No & A flag indicating there are more
3108  \qi\ values to be decoded. \\
3109 \bottomrule\end{tabularx}
3110 \medskip
3111
3112 The frame header selects which type of frame is being decoded, intra or inter,
3113  and contains the list of \qi\ values that will be used in this frame.
3114 The first \qi\ value will be used for {\em all} DC coefficients in all blocks.
3115 This is done to ensure that DC prediction, which is done in the quantized
3116  domain, works as expected.
3117 The AC coefficients, however, can be dequantized using any \qi\ value on the
3118  list, selected on a block-by-block basis.
3119
3120 \begin{enumerate}
3121 \item
3122 Read a 1-bit unsigned integer.
3123 If the value read is not zero, stop.
3124 This is not a data packet.
3125 \item
3126 Read a 1-bit unsigned integer as \bitvar{FTYPE}.
3127 This is the type of frame being decoded, as given in
3128  Table~\ref{tab:frame-type}.
3129 If this is the first frame being decoded, this MUST be zero.
3130
3131 \begin{table}[htbp]
3132 \begin{center}
3133 \begin{tabular}{cl}\toprule
3134 \bitvar{FTYPE} & Frame Type  \\\midrule
3135 $0$            & Intra frame \\
3136 $1$            & Inter frame \\
3137 \bottomrule\end{tabular}
3138 \end{center}
3139 \caption{Frame Type Values}
3140 \label{tab:frame-type}
3141 \end{table}
3142
3143 \item
3144 Read in a 6-bit unsigned integer as $\bitvar{QIS}[0]$.
3145 \item
3146 Read a 1-bit unsigned integer as \locvar{MOREQIS}.
3147 \item
3148 If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 1.
3149 \item
3150 Otherwise:
3151 \begin{enumerate}
3152 \item
3153 Read in a 6-bit unsigned integer as $\bitvar{QIS}[1]$.
3154 \item
3155 Read a 1-bit unsigned integer as \locvar{MOREQIS}.
3156 \item
3157 If \locvar{MOREQIS} is zero, set \bitvar{NQIS} to 2.
3158 \item
3159 Otherwise:
3160 \begin{enumerate}
3161 \item
3162 Read in a 6-bit unsigned integer as $\bitvar{QIS}[2]$.
3163 \item
3164 Set \bitvar{NQIS} to 3.
3165 \end{enumerate}
3166 \end{enumerate}
3167 \item
3168 If \bitvar{FTYPE} is 0, read a 3-bit unsigned integer.
3169 These bits are reserved.
3170 If this value is not zero, stop.
3171 This frame is not decodable according to this specification.
3172 \end{enumerate}
3173
3174 \paragraph{VP3 Compatibility}
3175
3176 The precise format of the frame header is substantially different in Theora
3177  than in VP3.
3178 The original VP3 format includes a larger number of unused, reserved bits that
3179  are required to be zero.
3180 The original VP3 frame header also can contain only a single \qi\ value,
3181  because VP3 does not support block-level \qi\ values and uses the same
3182  \qi\ value for all the coefficients in a frame.
3183
3184 \section{Run-Length Encoded Bit Strings}
3185
3186 Two variations of run-length encoding are used to store sequences of bits for
3187  the block coded flags and the block-level \qi\ values.
3188 The procedures to decode these bit sequences are specified in the following two
3189  sections.
3190
3191 \subsection{Long-Run Bit String Decode}
3192 \label{sub:long-run}
3193
3194 \paragraph{Input parameters:}\hfill\\*
3195 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3196 \multicolumn{1}{c}{Name} &
3197 \multicolumn{1}{c}{Type} &
3198 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3199 \multicolumn{1}{c}{Signed?} &
3200 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3201 \bitvar{NBITS}   & Integer & 36 & No & The number of bits to decode. \\
3202 \bottomrule\end{tabularx}
3203
3204 \paragraph{Output parameters:}\hfill\\*
3205 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3206 \multicolumn{1}{c}{Name} &
3207 \multicolumn{1}{c}{Type} &
3208 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3209 \multicolumn{1}{c}{Signed?} &
3210 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3211 \bitvar{BITS}    & Bit string &    &    & The decoded bits. \\
3212 \bottomrule\end{tabularx}
3213
3214 \paragraph{Variables used:}\hfill\\*
3215 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3216 \multicolumn{1}{c}{Name} &
3217 \multicolumn{1}{c}{Type} &
3218 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3219 \multicolumn{1}{c}{Signed?} &
3220 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3221 \locvar{LEN}    & Integer & 36 & No & The number of bits decoded so far. \\
3222 \locvar{BIT}    & Integer &  1 & No & The value associated with the current
3223  run. \\
3224 \locvar{RLEN}   & Integer & 13 & No & The length of the current run. \\
3225 \locvar{RBITS}  & Integer &  4 & No & The number of extra bits needed to
3226  decode the run length. \\
3227 \locvar{RSTART} & Integer &  6 & No & The start of the possible run-length
3228  values for a given Huffman code. \\
3229 \locvar{ROFFS}  & Integer & 12 & No & The offset from \locvar{RSTART} of the
3230  run-length. \\
3231 \bottomrule\end{tabularx}
3232 \medskip
3233
3234 There is no practical limit to the number of consecutive 0's and 1's that can
3235  be decoded with this procedure.
3236 In reality, the run length is limited by the number of blocks in a single
3237  frame, because more will never be requested.
3238 A separate procedure described in Section~\ref{sub:short-run} is used when
3239  there is a known limit on the maximum size of the runs.
3240
3241 For the first run, a single bit value is read, and then a Huffman-coded
3242  representation of a run length is decoded, and that many copies of the bit
3243  value are appended to the bit string.
3244 For each consecutive run, the value of the bit is toggled instead of being read
3245  from the bitstream.
3246
3247 The only exception is if the length of the previous run was 4129, the maximum
3248  possible length encodable by the Huffman-coded representation.
3249 In this case another bit value is read from the stream, to allow for
3250  consecutive runs of 0's or 1's longer than this maximum.
3251
3252 Note that in both cases---for the first run and after a run of length 4129---if
3253  no more bits are needed, then no bit value is read.
3254
3255 The complete decoding procedure is as follows:
3256
3257 \begin{enumerate}
3258 \item
3259 Assign \locvar{LEN} the value 0.
3260 \item
3261 Assign \bitvar{BITS} the empty string.
3262 \item
3263 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3264  \bitvar{BITS}.
3265 \item
3266 Read a 1-bit unsigned integer as \locvar{BIT}.
3267 \item
3268 \label{step:long-run-loop}
3269 Read a bit at a time until one of the Huffman codes given in
3270  Table~\ref{tab:long-run} is recognized.
3271
3272 \begin{table}[htbp]
3273 \begin{center}
3274 \begin{tabular}{lrrl}\toprule
3275 Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths     \\\midrule
3276 \bin{0}      & $1$             & $0$            & $1$             \\
3277 \bin{10}     & $2$             & $1$            & $2\ldots 3$     \\
3278 \bin{110}    & $4$             & $1$            & $4\ldots 5$     \\
3279 \bin{1110}   & $6$             & $2$            & $6\ldots 9$     \\
3280 \bin{11110}  & $10$            & $3$            & $10\ldots 17$   \\
3281 \bin{111110} & $18$            & $4$            & $18\ldots 33$   \\
3282 \bin{111111} & $34$            & $12$           & $34\ldots 4129$ \\
3283 \bottomrule\end{tabular}
3284 \end{center}
3285 \caption{Huffman Codes for Long Run Lengths}
3286 \label{tab:long-run}
3287 \end{table}
3288
3289 \item
3290 Assign \locvar{RSTART} and \locvar{RBITS} the values given in
3291  Table~\ref{tab:long-run} according to the Huffman code read.
3292 \item
3293 Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}.
3294 \item
3295 Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$.
3296 \item
3297 Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}.
3298 \item
3299 Add \locvar{RLEN} to the value \locvar{LEN}.
3300 \locvar{LEN} MUST be less than or equal to \bitvar{NBITS}.
3301 \item
3302 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3303  \bitvar{BITS}.
3304 \item
3305 If \locvar{RLEN} equals 4129, read a 1-bit unsigned integer as \locvar{BIT}.
3306 \item
3307 Otherwise, assign \locvar{BIT} the value $(1-\locvar{BIT})$.
3308 \item
3309 Continue decoding runs from step~\ref{step:long-run-loop}.
3310 \end{enumerate}
3311
3312 \paragraph{VP3 Compatibility}
3313
3314 VP3 does not read a new bit value after decoding a run length of 4129.
3315 This limits the maximum number of consecutive 0's or 1's to 4129 in
3316  VP3-compatible streams.
3317 For reasonable video sizes of $1920\times 1080$ or less in 4:2:0 format---the
3318  only pixel format VP3 supports---this does not pose any problems because runs
3319  longer than 4129 are not needed.
3320
3321 \subsection{Short-Run Bit String Decode}
3322 \label{sub:short-run}
3323
3324 \paragraph{Input parameters:}\hfill\\*
3325 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3326 \multicolumn{1}{c}{Name} &
3327 \multicolumn{1}{c}{Type} &
3328 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3329 \multicolumn{1}{c}{Signed?} &
3330 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3331 \bitvar{NBITS}   & Integer & 36 & No & The number of bits to decode. \\
3332 \bottomrule\end{tabularx}
3333
3334 \paragraph{Output parameters:}\hfill\\*
3335 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3336 \multicolumn{1}{c}{Name} &
3337 \multicolumn{1}{c}{Type} &
3338 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3339 \multicolumn{1}{c}{Signed?} &
3340 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3341 \bitvar{BITS}    & Bit string &    &    & The decoded bits. \\
3342 \bottomrule\end{tabularx}
3343
3344 \paragraph{Variables used:}\hfill\\*
3345 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3346 \multicolumn{1}{c}{Name} &
3347 \multicolumn{1}{c}{Type} &
3348 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3349 \multicolumn{1}{c}{Signed?} &
3350 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3351 \locvar{LEN}    & Integer & 36 & No & The number of bits decoded so far. \\
3352 \locvar{BIT}    & Integer &  1 & No & The value associated with the current
3353  run. \\
3354 \locvar{RLEN}   & Integer & 13 & No & The length of the current run. \\
3355 \locvar{RBITS}  & Integer &  4 & No & The number of extra bits needed to
3356  decode the run length. \\
3357 \locvar{RSTART} & Integer &  6 & No & The start of the possible run-length
3358  values for a given Huffman code. \\
3359 \locvar{ROFFS}  & Integer & 12 & No & The offset from \locvar{RSTART} of the
3360  run-length. \\
3361 \bottomrule\end{tabularx}
3362 \medskip
3363
3364 This procedure is similar to the procedure outlined in
3365  Section~\ref{sub:long-run}, except that the maximum number of consecutive 0's
3366  or 1's is limited to 30.
3367 This is the maximum run length needed when encoding a bit for each of the 16
3368  blocks in a super block when it is known that not all the bits in a super
3369  block are the same.
3370
3371 The complete decoding procedure is as follows:
3372
3373 \begin{enumerate}
3374 \item
3375 Assign \locvar{LEN} the value 0.
3376 \item
3377 Assign \bitvar{BITS} the empty string.
3378 \item
3379 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3380  \bitvar{BITS}.
3381 \item
3382 Read a 1-bit unsigned integer as \locvar{BIT}.
3383 \item
3384 \label{step:short-run-loop}
3385 Read a bit at a time until one of the Huffman codes given in
3386  Table~\ref{tab:short-run} is recognized.
3387
3388 \begin{table}[htbp]
3389 \begin{center}
3390 \begin{tabular}{lrrl}\toprule
3391 Huffman Code & \locvar{RSTART} & \locvar{RBITS} & Run Lengths   \\\midrule
3392 \bin{0}      & $1$             & $1$            & $1\ldots 2$   \\
3393 \bin{10}     & $3$             & $1$            & $3\ldots 4$   \\
3394 \bin{110}    & $5$             & $1$            & $5\ldots 6$   \\
3395 \bin{1110}   & $7$             & $2$            & $7\ldots 10$  \\
3396 \bin{11110}  & $11$            & $2$            & $11\ldots 14$ \\
3397 \bin{11111}  & $15$            & $4$            & $15\ldots 30$ \\
3398 \bottomrule\end{tabular}
3399 \end{center}
3400 \caption{Huffman Codes for Short Run Lengths}
3401 \label{tab:short-run}
3402 \end{table}
3403
3404 \item
3405 Assign \locvar{RSTART} and \locvar{RBITS} the values given in
3406  Table~\ref{tab:short-run} according to the Huffman code read.
3407 \item
3408 Read an \locvar{RBITS}-bit unsigned integer as \locvar{ROFFS}.
3409 \item
3410 Assign \locvar{RLEN} the value $(\locvar{RSTART}+\locvar{ROFFS})$.
3411 \item
3412 Append \locvar{RLEN} copies of \locvar{BIT} to \bitvar{BITS}.
3413 \item
3414 Add \locvar{RLEN} to the value \locvar{LEN}.
3415 \locvar{LEN} MUST be less than or equal to \bitvar{NBITS}.
3416 \item
3417 If \locvar{LEN} equals \bitvar{NBITS}, return the completely decoded string
3418  \bitvar{BITS}.
3419 \item
3420 Assign \locvar{BIT} the value $(1-\locvar{BIT})$.
3421 \item
3422 Continue decoding runs from step~\ref{step:short-run-loop}.
3423 \end{enumerate}
3424
3425 \section{Coded Block Flags Decode}
3426 \label{sub:coded-blocks}
3427
3428 \paragraph{Input parameters:}\hfill\\*
3429 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3430 \multicolumn{1}{c}{Name} &
3431 \multicolumn{1}{c}{Type} &
3432 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3433 \multicolumn{1}{c}{Signed?} &
3434 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3435 \bitvar{FTYPE}   & Integer &  1 & No & The frame type. \\
3436 \bitvar{NSBS}    & Integer & 32 & No & The total number of super blocks in a
3437  frame. \\
3438 \bitvar{NBS}     & Integer & 36 & No & The total number of blocks in a
3439  frame. \\
3440 \bottomrule\end{tabularx}
3441
3442 \paragraph{Output parameters:}\hfill\\*
3443 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3444 \multicolumn{1}{c}{Name} &
3445 \multicolumn{1}{c}{Type} &
3446 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3447 \multicolumn{1}{c}{Signed?} &
3448 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3449 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
3450                                1 & No & An \bitvar{NBS}-element array of flags
3451  indicating which blocks are coded. \\
3452 \bottomrule\end{tabularx}
3453
3454 \paragraph{Variables used:}\hfill\\*
3455 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3456 \multicolumn{1}{c}{Name} &
3457 \multicolumn{1}{c}{Type} &
3458 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3459 \multicolumn{1}{c}{Signed?} &
3460 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3461 \locvar{NBITS}    & Integer & 36 & No & The length of a bit string to decode. \\
3462 \locvar{BITS}     & Bit string & &    & A decoded set of flags. \\
3463 \locvar{SBPCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3464                                1 & No & An \bitvar{NSBS}-element array of flags
3465  indicating whether or not each super block is partially coded. \\
3466 \locvar{SBFCODED} & \multicolumn{1}{p{40pt}}{Integer Array} &
3467                                1 & No & An \bitvar{NSBS}-element array of flags
3468  indicating whether or not each non-partially coded super block is fully
3469  coded. \\
3470 \locvar{\sbi}     & Integer & 32 & No & The index of the current super
3471  block. \\
3472 \locvar{\bi}      & Integer & 36 & No & The index of the current block in coded
3473  order. \\
3474 \bottomrule\end{tabularx}
3475 \medskip
3476
3477 This procedure determines which blocks are coded in a given frame.
3478 In an intra frame, it marks all blocks coded.
3479 In an inter frame, however, any or all of the blocks may remain uncoded.
3480 The output is a list of bit flags, one for each block, marking it coded or not
3481  coded.
3482
3483 It is important to note that flags are still decoded for any blocks which lie
3484  entirely outside the picture region, even though they are not displayed.
3485 Encoders MAY choose to code such blocks.
3486 Decoders MUST faithfully reconstruct such blocks, because their contents can be
3487  used for predictors in future frames.
3488 Flags are \textit{not} decoded for portions of a super block which lie outside
3489  the full frame, as there are no blocks in those regions.
3490
3491 The complete procedure is as follows:
3492
3493 \begin{enumerate}
3494 \item
3495 If \bitvar{FTYPE} is zero (intra frame):
3496 \begin{enumerate}
3497 \item
3498 For each consecutive value of \locvar{\bi} from 0 to $(\locvar{NBS}-1)$, assign
3499  $\bitvar{BCODED}[\locvar{\bi}]$ the value one.
3500 \end{enumerate}
3501 \item
3502 Otherwise (inter frame):
3503 \begin{enumerate}
3504 \item
3505 Assign \locvar{NBITS} the value \bitvar{NSBS}.
3506 \item
3507 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3508  described in Section~\ref{sub:long-run}.
3509 This represents the list of partially coded super blocks.
3510 \item
3511 For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$,
3512  remove the bit at the head of the string \locvar{BITS} and assign it to
3513  $\locvar{SBPCODED}[\locvar{\sbi}]$.
3514 \item
3515 Assign \locvar{NBITS} the total number of super blocks such that \\
3516  $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero.
3517 \item
3518 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3519  described in Section~\ref{sub:long-run}.
3520 This represents the list of fully coded super blocks.
3521 \item
3522 For each consecutive value of \locvar{\sbi} from 0 to $(\locvar{NSBS}-1)$ such
3523  that $\locvar{SBPCODED}[\locvar{\sbi}]$ equals zero, remove the bit at the
3524  head of the string \locvar{BITS} and assign it to
3525  $\locvar{SBFCODED}[\locvar{\sbi}]$.
3526 \item
3527 Assign \locvar{NBITS} the number of blocks contained in super blocks where
3528  $\locvar{SBPCODED}[\locvar{\sbi}]$ equals one.
3529 Note that this might {\em not} be equal to 16 times the number of partially
3530  coded super blocks, since super blocks which overlap the edge of the frame
3531  will have fewer than 16 blocks in them.
3532 \item
3533 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
3534  described in Section~\ref{sub:short-run}.
3535 \item
3536 For each block in coded order---indexed by \locvar{\bi}:
3537 \begin{enumerate}
3538 \item
3539 Assign \locvar{\sbi} the index of the super block containing block
3540  \locvar{\bi}.
3541 \item
3542 If $\locvar{SBPCODED}[\locvar{\sbi}]$ is zero, assign
3543  $\bitvar{BCODED}[\locvar{\bi}]$ the value $\locvar{SBFCODED}[\locvar{\sbi}]$.
3544 \item
3545 Otherwise, remove the bit at the head of the string \locvar{BITS} and assign it
3546  to $\bitvar{BCODED}[\locvar{\bi}]$.
3547 \end{enumerate}
3548 \end{enumerate}
3549 \end{enumerate}
3550
3551 \section{Macro Block Coding Modes}
3552 \label{sub:mb-modes}
3553
3554 \paragraph{Input parameters:}\hfill\\*
3555 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3556 \multicolumn{1}{c}{Name} &
3557 \multicolumn{1}{c}{Type} &
3558 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3559 \multicolumn{1}{c}{Signed?} &
3560 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3561 \bitvar{FTYPE}    & Integer &  1 & No & The frame type. \\
3562 \bitvar{NMBS}     & Integer & 32 & No & The total number of macro blocks in a
3563  frame. \\
3564 \bitvar{NBS}      & Integer & 36 & No & The total number of blocks in a
3565  frame. \\
3566 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
3567                               1 & No & An \bitvar{NBS}-element array of flags
3568  indicating which blocks are coded. \\
3569 \bottomrule\end{tabularx}
3570
3571 \paragraph{Output parameters:}\hfill\\*
3572 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3573 \multicolumn{1}{c}{Name} &
3574 \multicolumn{1}{c}{Type} &
3575 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3576 \multicolumn{1}{c}{Signed?} &
3577 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3578 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
3579                               3 & No & An \bitvar{NMBS}-element array of coding
3580  modes for each macro block. \\
3581 \bottomrule\end{tabularx}
3582
3583 \paragraph{Variables used:}\hfill\\*
3584 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3585 \multicolumn{1}{c}{Name} &
3586 \multicolumn{1}{c}{Type} &
3587 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3588 \multicolumn{1}{c}{Signed?} &
3589 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3590 \locvar{MSCHEME}   & Integer &  3 & No & The mode coding scheme. \\
3591 \locvar{MALPHABET} & \multicolumn{1}{p{40pt}}{Integer array}
3592                              &  3 & No & The list of modes corresponding to each
3593  Huffman code. \\
3594 \locvar{\mbi}      & Integer & 32 & No & The index of the current macro
3595  block. \\
3596 \locvar{\bi}       & Integer & 36 & No & The index of the current block in
3597  coded order. \\
3598 \locvar{\mi}       & Integer & 32 & No & The index of a Huffman code from
3599  Table~\ref{tab:mode-codes}, starting from $0$. \\
3600 \bottomrule\end{tabularx}
3601 \medskip
3602
3603 In an intra frame, every macro block marked as coded in INTRA mode.
3604 In an inter frame, however, a macro block can be coded in one of eight coding
3605  modes, given in Table~\ref{tab:coding-modes}.
3606 All of the blocks in all color planes contained in a macro block will be
3607  assigned the coding mode of that macro block.
3608
3609 \begin{table}[htbp]
3610 \begin{center}
3611 \begin{tabular}{cl}\toprule
3612 Index & Coding Mode \\\midrule
3613 $0$   & INTER\_NOMV         \\
3614 $1$   & INTRA               \\
3615 $2$   & INTER\_MV           \\
3616 $3$   & INTER\_MV\_LAST     \\
3617 $4$   & INTER\_MV\_LAST2    \\
3618 $5$   & INTER\_GOLDEN\_NOMV \\
3619 $6$   & INTER\_GOLDEN\_MV   \\
3620 $7$   & INTER\_MV\_FOUR     \\
3621 \bottomrule\end{tabular}
3622 \end{center}
3623 \caption{Coding Modes}
3624 \label{tab:coding-modes}
3625 \end{table}
3626
3627 An important thing to note is that a coding mode is only stored in the
3628  bitstream for a macro block if it has at least one {\em luma} block coded.
3629 A macro block that contains coded blocks in the chroma planes, but not in the
3630  luma plane, MUST be coded in INTER\_NOMV mode.
3631 Thus, no coding mode needs to be decoded for such a macro block.
3632
3633 Coding modes are encoded using one of eight different schemes.
3634 Schemes 0 through 6 use the same simple Huffman code to represent the mode
3635  numbers, as given in Table~\ref{tab:mode-codes}.
3636 The difference in the schemes is the mode number assigned to each code.
3637 Scheme 0 uses an assignment specified in the bitstream, while schemes 1--6 use
3638  a fixed assignment, also given in Table~\ref{tab:mode-codes}.
3639 Scheme 7 simply codes each mode directly in the bitstream using three bits.
3640
3641 \begin{table}[htbp]
3642 \begin{center}
3643 \begin{tabular}{lcccccc}\toprule
3644 Scheme        & $1$ & $2$ & $3$ & $4$ & $5$ & $6$ \\\cmidrule{2-7}
3645 Huffman Code  & \multicolumn{6}{c}{Coding Mode}   \\\midrule
3646 \bin{0}       & $3$ & $3$ & $3$ & $3$ & $0$ & $0$ \\
3647 \bin{10}      & $4$ & $4$ & $2$ & $2$ & $3$ & $5$ \\
3648 \bin{110}     & $2$ & $0$ & $4$ & $0$ & $4$ & $3$ \\
3649 \bin{1110}    & $0$ & $2$ & $0$ & $4$ & $2$ & $4$ \\
3650 \bin{11110}   & $1$ & $1$ & $1$ & $1$ & $1$ & $2$ \\
3651 \bin{111110}  & $5$ & $5$ & $5$ & $5$ & $5$ & $1$ \\
3652 \bin{1111110} & $6$ & $6$ & $6$ & $6$ & $6$ & $6$ \\
3653 \bin{1111111} & $7$ & $7$ & $7$ & $7$ & $7$ & $7$ \\
3654 \bottomrule\end{tabular}
3655 \end{center}
3656 \caption{Coding Modes}
3657 \label{tab:mode-codes}
3658 \end{table}
3659
3660 \begin{enumerate}
3661 \item
3662 If \bitvar{FTYPE} is 0 (intra frame):
3663 \begin{enumerate}
3664 \item
3665 For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$,
3666  inclusive, assign $\bitvar{MBMODES}[\mbi]$ the value 0 (INTRA).
3667 \end{enumerate}
3668 \item
3669 Otherwise (inter frame):
3670 \begin{enumerate}
3671 \item
3672 Read a 3-bit unsigned integer as \locvar{MSCHEME}.
3673 \item
3674 If \locvar{MSCHEME} is 0:
3675 \begin{enumerate}
3676 \item
3677 For each consecutive value of \locvar{MODE} from 0 to 7, inclusive:
3678 \begin{enumerate}
3679 \item
3680 Read a 3-bit unsigned integer as \locvar{\mi}.
3681 \item
3682 Assign $\locvar{MALPHABET}[\mi]$ the value \locvar{MODE}.
3683 \end{enumerate}
3684 \end{enumerate}
3685 \item
3686 Otherwise, if \locvar{MSCHEME} is not 7, assign the entries of
3687  \locvar{MALPHABET} the values in the corresponding column of
3688  Table~\ref{tab:mode-codes}.
3689 \item
3690 For each consecutive macro block in coded order (cf.
3691  Section~\ref{sec:mbs})---indexed by \locvar{\mbi}:
3692 \begin{enumerate}
3693 \item
3694 If a block \locvar{\bi} in the luma plane of macro block \locvar{\mbi} exists
3695  such that $\bitvar{BCODED}[\locvar{\bi}]$ is 1:
3696 \begin{enumerate}
3697 \item
3698 If \locvar{MSCHEME} is not 7, read one bit at a time until one of the Huffman
3699  codes in Table~\ref{tab:mode-codes} is recognized, and assign
3700  $\bitvar{MBMODES}[\locvar{\mbi}]$ the value
3701  $\locvar{MALPHABET}[\locvar{\mi}]$, where \locvar{\mi} is the index of the
3702  Huffman code decoded.
3703 \item
3704 Otherwise, if no luma-plane blocks in the macro block are coded, read a 3-bit
3705  unsigned integer as $\bitvar{MBMODES}[\locvar{\mbi}]$.
3706 \end{enumerate}
3707 \item
3708 Otherwise, assign $\bitvar{MBMODE}[\locvar{\mbi}]$ the value 0 (INTER\_NOMV).
3709 \end{enumerate}
3710 \end{enumerate}
3711 \end{enumerate}
3712
3713 \section{Motion Vectors}
3714
3715 In an intra frame, no motion vectors are used, and so motion vector decoding is
3716  skipped.
3717 In an inter frame, however, many of the inter coding modes require a motion
3718  vector in order to specify an offset into the reference frame from which to
3719  predict a block.
3720 These procedures assigns such a motion vector to every block.
3721
3722 \subsection{Motion Vector Decode}
3723 \label{sub:mv-decode}
3724
3725 \paragraph{Input parameters:}\hfill\\*
3726 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3727 \multicolumn{1}{c}{Name} &
3728 \multicolumn{1}{c}{Type} &
3729 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3730 \multicolumn{1}{c}{Signed?} &
3731 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3732 \bitvar{MVMODE}   & Integer &  1 & No & The motion vector decoding method. \\
3733 \bottomrule\end{tabularx}
3734
3735 \paragraph{Output parameters:}\hfill\\*
3736 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3737 \multicolumn{1}{c}{Name} &
3738 \multicolumn{1}{c}{Type} &
3739 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3740 \multicolumn{1}{c}{Signed?} &
3741 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3742 \bitvar{MVX}      & Integer &  6 & Yes & The X component of the motion
3743  vector. \\
3744 \bitvar{MVY}      & Integer &  6 & Yes & The Y component of the motion
3745  vector. \\
3746 \bottomrule\end{tabularx}
3747
3748 \paragraph{Variables used:}\hfill\\*
3749 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3750 \multicolumn{1}{c}{Name} &
3751 \multicolumn{1}{c}{Type} &
3752 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3753 \multicolumn{1}{c}{Signed?} &
3754 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3755 \locvar{MVSIGN}   & Integer &  1 & No & The sign of the motion vector component
3756  just decoded. \\
3757 \bottomrule\end{tabularx}
3758 \medskip
3759
3760 The individual components of a motion vector can be coded using one of two
3761  methods.
3762 The first uses a variable length Huffman code, given in
3763  Table~\ref{tab:mv-huff-codes}.
3764 The second encodes the magnitude of the component directly in 5 bits, and the
3765  sign in one bit.
3766 Note that in this case there are two representations for the value zero.
3767 For compatibility with VP3, a sign bit is read even if the magnitude read is
3768  zero.
3769 One scheme is chosen and used for the entire frame.
3770
3771 Each component can take on integer values from $-31\ldots 31$, inclusive, at
3772  half-pixel resolution, i.e. $-15.5\ldots 15.5$ pixels in the luma plane.
3773 For each subsampled axis in the chroma planes, the corresponding motion vector
3774  component is interpreted as being at quarter-pixel resolution, i.e.
3775  $-7.75\ldots 7.75$ pixels.
3776 The precise details of how these vectors are used to compute predictors for
3777  each block are described in Section~\ref{sec:predictors}.
3778
3779 \begin{table}[ht]
3780 \begin{center}
3781 \begin{tabular}{lrlr}\toprule
3782 Huffman Code   & Value & Huffman Code   & Value \\\midrule
3783 \bin{000}      & $0$   \\
3784 \bin{001}      & $1$   & \bin{010}      & $-1$  \\
3785 \bin{0110}     & $2$   & \bin{0111}     & $-2$  \\
3786 \bin{1000}     & $3$   & \bin{1001}     & $-3$  \\
3787 \bin{101000}   & $4$   & \bin{101001}   & $-4$  \\
3788 \bin{101010}   & $5$   & \bin{101011}   & $-5$  \\
3789 \bin{101100}   & $6$   & \bin{101101}   & $-6$  \\
3790 \bin{101110}   & $7$   & \bin{101111}   & $-7$  \\
3791 \bin{1100000}  & $8$   & \bin{1100001}  & $-8$  \\
3792 \bin{1100010}  & $9$   & \bin{1100011}  & $-9$  \\
3793 \bin{1100100}  & $10$  & \bin{1100101}  & $-10$ \\
3794 \bin{1100110}  & $11$  & \bin{1100111}  & $-11$ \\
3795 \bin{1101000}  & $12$  & \bin{1101001}  & $-12$ \\
3796 \bin{1101010}  & $13$  & \bin{1101011}  & $-13$ \\
3797 \bin{1101100}  & $14$  & \bin{1101101}  & $-14$ \\
3798 \bin{1101110}  & $15$  & \bin{1101111}  & $-15$ \\
3799 \bin{11100000} & $16$  & \bin{11100001} & $-16$ \\
3800 \bin{11100010} & $17$  & \bin{11100011} & $-17$ \\
3801 \bin{11100100} & $18$  & \bin{11100101} & $-18$ \\
3802 \bin{11100110} & $19$  & \bin{11100111} & $-19$ \\
3803 \bin{11101000} & $20$  & \bin{11101001} & $-20$ \\
3804 \bin{11101010} & $21$  & \bin{11101011} & $-21$ \\
3805 \bin{11101100} & $22$  & \bin{11101101} & $-22$ \\
3806 \bin{11101110} & $23$  & \bin{11101111} & $-23$ \\
3807 \bin{11110000} & $24$  & \bin{11110001} & $-24$ \\
3808 \bin{11110010} & $25$  & \bin{11110011} & $-25$ \\
3809 \bin{11110100} & $26$  & \bin{11110101} & $-26$ \\
3810 \bin{11110110} & $27$  & \bin{11110111} & $-27$ \\
3811 \bin{11111000} & $28$  & \bin{11111001} & $-28$ \\
3812 \bin{11111010} & $29$  & \bin{11111011} & $-29$ \\
3813 \bin{11111100} & $30$  & \bin{11111101} & $-30$ \\
3814 \bin{11111110} & $31$  & \bin{11111111} & $-31$ \\
3815 \bottomrule\end{tabular}
3816 \end{center}
3817 \caption{Huffman Codes for Motion Vector Components}
3818 \label{tab:mv-huff-codes}
3819 \end{table}
3820
3821 A single motion vector is decoded is follows:
3822
3823 \begin{enumerate}
3824 \item
3825 If \bitvar{MVMODE} is 0:
3826 \begin{enumerate}
3827 \item
3828 Read 1 bit at a time until one of the Huffman codes in
3829  Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to
3830  \locvar{MVX}.
3831 \item
3832 Read 1 bit at a time until one of the Huffman codes in
3833  Table~\ref{tab:mv-huff-codes} is recognized, and assign the value to
3834  \locvar{MVY}.
3835 \end{enumerate}
3836 \item
3837 Otherwise:
3838 \begin{enumerate}
3839 \item
3840 Read a 5-bit unsigned integer as \bitvar{MVX}.
3841 \item
3842 Read a 1-bit unsigned integer as \locvar{MVSIGN}.
3843 \item
3844 If \locvar{MVSIGN} is 1, assign \bitvar{MVX} the value $-\bitvar{MVX}$.
3845 \item
3846 Read a 5-bit unsigned integer as \bitvar{MVY}.
3847 \item
3848 Read a 1-bit unsigned integer as \locvar{MVSIGN}.
3849 \item
3850 If \locvar{MVSIGN} is 1, assign \bitvar{MVY} the value $-\bitvar{MVY}$.
3851 \end{enumerate}
3852 \end{enumerate}
3853
3854 \subsection{Macro Block Motion Vector Decode}
3855 \label{sub:mb-mv-decode}
3856
3857 \paragraph{Input parameters:}\hfill\\*
3858 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3859 \multicolumn{1}{c}{Name} &
3860 \multicolumn{1}{c}{Type} &
3861 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3862 \multicolumn{1}{c}{Signed?} &
3863 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3864 \bitvar{PF}      & Integer &  2 & No & The pixel format. \\
3865 \bitvar{NMBS}    & Integer & 32 & No & The total number of macro blocks in a
3866  frame. \\
3867 \bitvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
3868                               3 & No & An \bitvar{NMBS}-element array of coding
3869  modes for each macro block. \\
3870 \bitvar{NBS}      & Integer & 36 & No & The total number of blocks in a
3871  frame. \\
3872 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
3873                               1 & No & An \bitvar{NBS}-element array of flags
3874  indicating which blocks are coded. \\
3875 \bottomrule\end{tabularx}
3876
3877 \paragraph{Output parameters:}\hfill\\*
3878 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3879 \multicolumn{1}{c}{Name} &
3880 \multicolumn{1}{c}{Type} &
3881 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3882 \multicolumn{1}{c}{Signed?} &
3883 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3884 \bitvar{MVECTS}   & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
3885                                6 & Yes & An \bitvar{NBS}-element array of
3886  motion vectors for each block. \\
3887 \bottomrule\end{tabularx}
3888
3889 \paragraph{Variables used:}\hfill\\*
3890 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
3891 \multicolumn{1}{c}{Name} &
3892 \multicolumn{1}{c}{Type} &
3893 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
3894 \multicolumn{1}{c}{Signed?} &
3895 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
3896 \locvar{LAST1}    & \multicolumn{1}{p{50pt}}{2D Integer Vector} &
3897                                6 & Yes & The last motion vector. \\
3898 \locvar{LAST2}    & \multicolumn{1}{p{50pt}}{2D Integer Vector} &
3899                                6 & Yes & The second to last motion vector. \\
3900 \locvar{MVX}      & Integer &  6 & Yes & The X component of a motion vector. \\
3901 \locvar{MVY}      & Integer &  6 & Yes & The Y component of a motion vector. \\
3902 \locvar{\mbi}     & Integer & 32 &  No & The index of the current macro
3903  block. \\
3904 \locvar{A}        & Integer & 36 &  No & The index of the lower-left luma block
3905  in the macro block. \\
3906 \locvar{B}        & Integer & 36 &  No & The index of the lower-right luma
3907  block in the macro block. \\
3908 \locvar{C}        & Integer & 36 &  No & The index of the upper-left luma block
3909  in the macro block. \\
3910 \locvar{D}        & Integer & 36 &  No & The index of the upper-right luma
3911  block in the macro block. \\
3912 \locvar{E}        & Integer & 36 &  No & The index of a chroma block in the
3913  macro block, depending on the pixel format. \\
3914 \locvar{F}        & Integer & 36 &  No & The index of a chroma block in the
3915  macro block, depending on the pixel format. \\
3916 \locvar{G}        & Integer & 36 &  No & The index of a chroma block in the
3917  macro block, depending on the pixel format. \\
3918 \locvar{H}        & Integer & 36 &  No & The index of a chroma block in the
3919  macro block, depending on the pixel format. \\
3920 \locvar{I}        & Integer & 36 &  No & The index of a chroma block in the
3921  macro block, depending on the pixel format. \\
3922 \locvar{J}        & Integer & 36 &  No & The index of a chroma block in the
3923  macro block, depending on the pixel format. \\
3924 \locvar{K}        & Integer & 36 &  No & The index of a chroma block in the
3925  macro block, depending on the pixel format. \\
3926 \locvar{L}        & Integer & 36 &  No & The index of a chroma block in the
3927  macro block, depending on the pixel format. \\
3928 \bottomrule\end{tabularx}
3929 \medskip
3930
3931 Motion vectors are stored for each macro block.
3932 In every mode except for INTER\_MV\_FOUR, every block in all the color planes
3933  are assigned the same motion vector.
3934 In INTER\_MV\_FOUR mode, all four blocks in the luma plane are assigned their
3935  own motion vector, and motion vectors for blocks in the chroma planes are
3936  computed from these, using averaging appropriate to the pixel format.
3937
3938 For INTER\_MV and INTER\_GOLDEN\_MV modes, a single motion vector is decoded
3939  and applied to each block.
3940 For INTER\_MV\_FOUR macro blocks, a motion vector is decoded for each coded
3941  luma block.
3942 Uncoded luma blocks receive the default $(0,0)$ vector for the purposes of
3943  computing the chroma motion vectors.
3944
3945 None of the remaining macro block coding modes require decoding motion vectors
3946  from the stream.
3947 INTRA mode does not use a motion-compensated predictor, and so requires no
3948  motion vector, and INTER\_NOMV and INTER\_GOLDEN\_NOMV modes use the default
3949  vector $(0,0)$ for each block.
3950 This also includes all macro blocks with no coded luma blocks, as they are
3951  coded in INTER\_NOMV mode by definition.
3952
3953 The modes INTER\_MV\_LAST and INTER\_MV\_LAST2 use the motion vector from the
3954  last macro block (in coded order) and the second to last macro block,
3955  respectively, that contained a motion vector pointing to the previous frame.
3956 Thus no explicit motion vector needs to be decoded for these modes.
3957 Macro blocks coded in INTRA mode or one of the GOLDEN modes are not considered
3958  in this process.
3959 If an insufficient number of macro blocks have been coded in one of the INTER
3960  modes, then the $(0,0)$ vector is used instead.
3961 For macro blocks coded in INTER\_MV\_FOUR mode, the vector from the upper-right
3962  luma block is used, even if the upper-right block is not coded.
3963
3964 The motion vectors are decoded from the stream as follows:
3965
3966 \begin{enumerate}
3967 \item
3968 Assign \locvar{LAST1} and \locvar{LAST2} both the value $(0,0)$.
3969 \item
3970 Read a 1-bit unsigned integer as \locvar{MVMODE}.
3971 Note that this value is read even if no macro blocks require a motion vector to
3972  be decoded.
3973 \item
3974 For each consecutive value of \locvar{\mbi} from 0 to $(\bitvar{NMBS}-1)$:
3975 \begin{enumerate}
3976 \item
3977 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 7 (INTER\_MV\_FOUR):
3978 \begin{enumerate}
3979 \item
3980 Let \locvar{A}, \locvar{B}, \locvar{C}, and \locvar{D} be the indices in coded
3981  order \locvar{\bi} of the luma blocks in macro block \locvar{\mbi}, arranged
3982  into raster order.
3983 Thus, \locvar{A} is the index in coded order of the block in the lower left,
3984  \locvar{B} the lower right, \locvar{C} the upper left, and \locvar{D} the
3985  upper right. % TODO: as shown in Figure~REF.
3986 \item
3987 If $\bitvar{BCODED}[\locvar{A}]$ is non-zero, decode a single motion vector
3988  into \locvar{MVX} and \locvar{MVY} using the procedure described in
3989  Section~\ref{sub:mv-decode}.
3990 \item
3991 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
3992 \item
3993 Assign $\bitvar{MVECTS}[\locvar{A}]$ the value $(\locvar{MVX},\locvar{MVY})$.
3994 \item
3995 If $\bitvar{BCODED}[\locvar{B}]$ is non-zero, decode a single motion vector
3996  into \locvar{MVX} and \locvar{MVY} using the procedure described in
3997  Section~\ref{sub:mv-decode}.
3998 \item
3999 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
4000 \item
4001 Assign $\bitvar{MVECTS}[\locvar{B}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4002 \item
4003 If $\bitvar{BCODED}[\locvar{C}]$ is non-zero, decode a single motion vector
4004  into \locvar{MVX} and \locvar{MVY} using the procedure described in
4005  Section~\ref{sub:mv-decode}.
4006 \item
4007 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
4008 \item
4009 Assign $\bitvar{MVECTS}[\locvar{C}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4010 \item
4011 If $\bitvar{BCODED}[\locvar{D}]$ is non-zero, decode a single motion vector
4012  into \locvar{MVX} and \locvar{MVY} using the procedure described in
4013  Section~\ref{sub:mv-decode}.
4014 \item
4015 Otherwise, assign \locvar{MVX} and \locvar{MVY} both the value zero.
4016 \item
4017 Assign $\bitvar{MVECTS}[\locvar{D}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4018 Note that \locvar{MVX} and \locvar{MVY} retain this last value.
4019 \item
4020 If \bitvar{PF} is 0 (4:2:0):
4021 \begin{enumerate}
4022 \item
4023 Let \locvar{E} and \locvar{F} be the index in coded order of the one block in
4024  the macro block from the $C_b$ and $C_r$ planes, respectively.
4025 \item
4026 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{F}]$ the
4027  value
4028 \begin{multline*}
4029 (\round\biggl(\frac{\begin{aligned}
4030  \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x+\\
4031  \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x
4032  \end{aligned}}{4}\biggr), \\
4033  \round\biggl(\frac{\begin{aligned}
4034  \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y+\\
4035  \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y
4036  \end{aligned}}{4}\biggr))
4037 \end{multline*}
4038 \end{enumerate}
4039 \item
4040 If \bitvar{PF} is 2 (4:2:2):
4041 \begin{enumerate}
4042 \item
4043 Let \locvar{E} and \locvar{F} be the indices in coded order of the top and
4044  bottom blocks in the macro block from the $C_b$ plane, respectively, and
4045  \locvar{G} and \locvar{H} be the indices in coded order of the top and bottom
4046  blocks in the $C_r$ plane, respectively. %TODO: as shown in Figure~REF.
4047 \item
4048 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{G}]$ the
4049  value
4050 \begin{multline*}
4051 (\round\left(\frac{
4052  \bitvar{MVECTS}[\locvar{A}]_x+\bitvar{MVECTS}[\locvar{B}]_x}{4}\right), \\
4053  \round\left(\frac{
4054  \bitvar{MVECTS}[\locvar{A}]_y+\bitvar{MVECTS}[\locvar{B}]_y}{4}\right))
4055 \end{multline*}
4056 \item
4057 Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{H}]$ the
4058  value
4059 \begin{multline*}
4060 (\round\left(\frac{
4061  \bitvar{MVECTS}[\locvar{C}]_x+\bitvar{MVECTS}[\locvar{D}]_x}{4}\right), \\
4062  \round\left(\frac{
4063  \bitvar{MVECTS}[\locvar{C}]_y+\bitvar{MVECTS}[\locvar{D}]_y}{4}\right))
4064 \end{multline*}
4065 \end{enumerate}
4066 \item
4067 If \bitvar{PF} is 3 (4:4:4):
4068 \begin{enumerate}
4069 \item
4070 Let \locvar{E}, \locvar{F}, \locvar{G}, and \locvar{H} be the indices
4071  \locvar{\bi} in coded order of the $C_b$ plane blocks in macro block
4072  \locvar{\mbi}, arranged into raster order, and \locvar{I}, \locvar{J},
4073  \locvar{K}, and \locvar{L} be the indices \locvar{\bi} in coded order of the
4074  $C_r$ plane blocks in macro block \locvar{\mbi}, arranged into raster order.
4075  %TODO: as shown in Figure~REF.
4076 \item
4077 Assign $\bitvar{MVECTS}[\locvar{E}]$ and $\bitvar{MVECTS}[\locvar{I}]$ the
4078  value \\ $\bitvar{MVECTS}[\locvar{A}]$.
4079 \item
4080 Assign $\bitvar{MVECTS}[\locvar{F}]$ and $\bitvar{MVECTS}[\locvar{J}]$ the
4081  value \\ $\bitvar{MVECTS}[\locvar{B}]$.
4082 \item
4083 Assign $\bitvar{MVECTS}[\locvar{G}]$ and $\bitvar{MVECTS}[\locvar{K}]$ the
4084  value \\ $\bitvar{MVECTS}[\locvar{C}]$.
4085 \item
4086 Assign $\bitvar{MVECTS}[\locvar{H}]$ and $\bitvar{MVECTS}[\locvar{L}]$ the
4087  value \\ $\bitvar{MVECTS}[\locvar{D}]$.
4088 \end{enumerate}
4089 \item
4090 Assign \locvar{LAST2} the value \locvar{LAST1}.
4091 \item
4092 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4093 \end{enumerate}
4094 \item
4095 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 6 (INTER\_GOLDEN\_MV),
4096  decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the
4097  procedure described in Section~\ref{sub:mv-decode}.
4098 \item
4099 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 4 (INTER\_MV\_LAST2):
4100 \begin{enumerate}
4101 \item
4102 Assign $(\locvar{MVX},\locvar{MVY}$ the value \locvar{LAST2}.
4103 \item
4104 Assign \locvar{LAST2} the value \locvar{LAST1}.
4105 \item
4106 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4107 \end{enumerate}
4108 \item
4109 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 3 (INTER\_MV\_LAST), assign
4110  $(\locvar{MVX},\locvar{MVY})$ the value \locvar{LAST1}.
4111 \item
4112 Otherwise, if $\bitvar{MBMODES}[\locvar{\mbi}]$ is 2 (INTER\_MV):
4113 \begin{enumerate}
4114 \item
4115 Decode a single motion vector into \locvar{MVX} and \locvar{MVY} using the
4116  procedure described in Section~\ref{sub:mv-decode}.
4117 \item
4118 Assign \locvar{LAST2} the value \locvar{LAST1}.
4119 \item
4120 Assign \locvar{LAST1} the value $(\locvar{MVX},\locvar{MVY})$.
4121 \end{enumerate}
4122 \item
4123 Otherwise (5:~INTER\_GOLDEN\_NOMV, 1:~INTRA, or \\
4124  0:~INTER\_NOMV), assign \locvar{MVX} and \locvar{MVY} the value zero.
4125 \item
4126 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is not 7 (not INTER\_MV\_FOUR), then for
4127  each coded block \locvar{\bi} in macro block \locvar{\mbi}:
4128 \begin{enumerate}
4129 \item
4130 Assign $\bitvar{MVECTS}[\locvar{\bi}]$ the value $(\locvar{MVX},\locvar{MVY})$.
4131 \end{enumerate}
4132 \end{enumerate}
4133 \end{enumerate}
4134
4135 \paragraph{VP3 Compatibility}
4136
4137 Unless all four luma blocks in the macro block are coded, the VP3 encoder does
4138  not select mode INTER\_MV\_FOUR.
4139 Theora removes this restriction by treating the motion vector for an uncoded
4140  luma block as the default $(0,0)$ vector.
4141 This is consistent with the premise that the block has not changed since the
4142  previous frame and that chroma information can be largely ignored when
4143  estimating motion.
4144
4145 No modification is required for INTER\_MV\_FOUR macro blocks in VP3 streams to
4146  be decoded correctly by a Theora decoder.
4147 However, regardless of how many of the luma blocks are actually coded, the VP3
4148  decoder always reads four motion vectors from the stream for INTER\_MV\_FOUR
4149  mode.
4150 The motion vectors read are used to calculate the motion vectors for the chroma
4151  blocks, but are otherwise ignored.
4152 Thus, care should be taken when creating Theora streams meant to be backwards
4153  compatible with VP3 to only use INTER\_MV\_FOUR mode when all four luma
4154  blocks are coded.
4155
4156 \section{Block-Level \qi\ Decode}
4157 \label{sub:block-qis}
4158
4159 \paragraph{Input parameters:}\hfill\\*
4160 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4161 \multicolumn{1}{c}{Name} &
4162 \multicolumn{1}{c}{Type} &
4163 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4164 \multicolumn{1}{c}{Signed?} &
4165 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4166 \bitvar{NBS}      & Integer & 36 & No & The total number of blocks in a
4167  frame. \\
4168 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
4169                                1 & No & An \bitvar{NBS}-element array of flags
4170  indicating which blocks are coded. \\
4171 \bitvar{NQIS}     & Integer &  2 & No & The number of \qi\ values. \\
4172 \bottomrule\end{tabularx}
4173
4174 \paragraph{Output parameters:}\hfill\\*
4175 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4176 \multicolumn{1}{c}{Name} &
4177 \multicolumn{1}{c}{Type} &
4178 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4179 \multicolumn{1}{c}{Signed?} &
4180 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4181 \bitvar{QIIS}   & \multicolumn{1}{p{40pt}}{Integer Array} &
4182                                2 & No & An \bitvar{NBS}-element array of
4183  \locvar{\qii} values for each block. \\
4184 \bottomrule\end{tabularx}
4185
4186 \paragraph{Variables used:}\hfill\\*
4187 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4188 \multicolumn{1}{c}{Name} &
4189 \multicolumn{1}{c}{Type} &
4190 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4191 \multicolumn{1}{c}{Signed?} &
4192 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4193 \locvar{NBITS}    & Integer & 36 & No & The length of a bit string to decode. \\
4194 \locvar{BITS}     & Bit string & &    & A decoded set of flags. \\
4195 \locvar{\bi}      & Integer & 36 & No & The index of the current block in
4196  coded order. \\
4197 \locvar{\qii}     & Integer &  2 & No & The index of \qi\ value in the list of
4198  \qi\ values defined for this frame. \\
4199 \bottomrule\end{tabularx}
4200 \medskip
4201
4202 This procedure selects the \qi\ value to be used for dequantizing the AC
4203  coefficients of each block.
4204 DC coefficients all use the same \qi\ value, so as to avoid interference with
4205  the DC prediction mechanism, which occurs in the quantized domain.
4206
4207 The value is actually represented by an index \locvar{\qii} into the list of
4208  \qi\ values defined for the frame.
4209 It makes multiple passes through the list of coded blocks, one for each \qi\
4210  value except the last one.
4211 In each pass, an RLE-coded bitmask is decoded to divide the blocks into two
4212  groups: those that use a value of \qi\ from later in the list, and those that
4213  do not.
4214 Each block in the second group is assigned the current \qi\ value.
4215 Each subsequent pass is restricted to the blocks in the first group.
4216
4217 \begin{enumerate}
4218 \item
4219 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign
4220  $\bitvar{QIIS}[\locvar{\bi}]$ the value zero.
4221 \item
4222 For each consecutive value of \locvar{\qii} from 0 to $(\bitvar{NQIS}-2)$:
4223 \begin{enumerate}
4224 \item
4225 Assign \locvar{NBITS} be the number of blocks \locvar{\bi} such that
4226  $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and $\bitvar{QIIS}[\locvar{\bi}]$
4227  equals $\locvar{\qii}$.
4228 \item
4229 Read an \locvar{NBITS}-bit bit string into \locvar{BITS}, using the procedure
4230  described in Section~\ref{sub:long-run}.
4231 This represents the list of blocks that use \qi\ value \locvar{\qii} or higher.
4232 \item
4233 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ such
4234  that $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and
4235  $\bitvar{QIIS}[\locvar{\bi}]$ equals $\locvar{\qii}$:
4236 \begin{enumerate}
4237 \item
4238 Remove the bit at the head of the string \locvar{BITS} and add its value to
4239  $\bitvar{QIIS}[\locvar{\bi}]$.
4240 \end{enumerate}
4241 \end{enumerate}
4242 \end{enumerate}
4243
4244 \paragraph{VP3 Compatibility}
4245
4246 For VP3 compatible streams, only one \qi\ value can be specified in the frame
4247  header, so the main loop of the above procedure, which goes to
4248  $\bitvar{NQIIS}-2$ instead of $\bitvar{NQIIS}-1$, is never executed.
4249 Thus, no bits are read and each block uses the one \qi\ value defined for the
4250  frame.
4251
4252 \cleardoublepage
4253
4254 \section{DCT Coefficients}
4255 \label{sec:dct-decode}
4256
4257 The quantized DCT coefficients are decoded by making 64 passes through the list
4258  of coded blocks, one for each token index in zig-zag order.
4259 For the DC tokens, two Huffman tables are chosen from among the first 16, one
4260  for the luma plane and one for the chroma planes.
4261 The AC tokens, however, are divided into four different groups.
4262 Again, two 4-bit indices are decoded, one for the luma plane, and one for the
4263  chroma planes, but these select the codebooks for {\em all four} groups.
4264 AC coefficients in group one use codebooks $16\ldots 31$, while group two uses
4265  $32\ldots 47$, etc.
4266 Note that this second set of indices is decoded even if there are no non-zero
4267  AC coefficients in the frame.
4268
4269 Tokens are divided into two major types: EOB tokens, which fill the remainder
4270  of one or more blocks with zeros, and coefficient tokens, which fill in one or
4271  more coefficients within a single block.
4272 A decoding procedure for the first is given in Section~\ref{sub:eob-token}, and
4273  for the second in Section~\ref{sub:coeff-token}.
4274 The decoding procedure for the complete set of quantized coefficients is given
4275  in Section~\ref{sub:dct-coeffs}.
4276
4277 \subsection{EOB Token Decode}
4278 \label{sub:eob-token}
4279
4280 \paragraph{Input parameters:}\hfill\\*
4281 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4282 \multicolumn{1}{c}{Name} &
4283 \multicolumn{1}{c}{Type} &
4284 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4285 \multicolumn{1}{c}{Signed?} &
4286 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4287 \bitvar{TOKEN}    & Integer &  5 & No  & The token being decoded.
4288 This must be in the range $0\ldots 6$. \\
4289 \bitvar{NBS}      & Integer & 36 & No  & The total number of blocks in a
4290  frame. \\
4291 \bitvar{TIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
4292                                7 & No  & An \bitvar{NBS}-element array of the
4293  current token index for each block. \\
4294 \bitvar{NCOEFFS}  & \multicolumn{1}{p{40pt}}{Integer Array} &
4295                                7 & No  & An \bitvar{NBS}-element array of the
4296  coefficient count for each block. \\
4297 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4298                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
4299  quantized DCT coefficient values for each block in zig-zag order. \\
4300 \bitvar{\bi}      & Integer & 36 & No  & The index of the current block in
4301  coded order. \\
4302 \bitvar{\ti}      & Integer &  6 & No  & The current token index. \\
4303 \bottomrule\end{tabularx}
4304
4305 \paragraph{Output parameters:}\hfill\\*
4306 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4307 \multicolumn{1}{c}{Name} &
4308 \multicolumn{1}{c}{Type} &
4309 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4310 \multicolumn{1}{c}{Signed?} &
4311 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4312 \bitvar{TIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
4313                                7 & No  & An \bitvar{NBS}-element array of the
4314  current token index for each block. \\
4315 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4316                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
4317  quantized DCT coefficient values for each block in zig-zag order. \\
4318 \bitvar{EOBS}     & Integer & 36 & No  & The remaining length of the current
4319  EOB run. \\
4320 \bottomrule\end{tabularx}
4321
4322 \paragraph{Variables used:}\hfill\\*
4323 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4324 \multicolumn{1}{c}{Name} &
4325 \multicolumn{1}{c}{Type} &
4326 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4327 \multicolumn{1}{c}{Signed?} &
4328 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4329 \locvar{\bj}      & Integer & 36 & No & Another index of a block in coded
4330  order. \\
4331 \locvar{\tj}      & Integer &  6 & No & Another token index. \\
4332 \bottomrule\end{tabularx}
4333 \medskip
4334
4335 A summary of the EOB tokens is given in Table~\ref{tab:eob-tokens}.
4336 An important thing to note is that token 6 does not add an offset to the
4337  decoded run value, even though in general it should only be used for runs of
4338  size 32 or longer.
4339 If a value of zero is decoded for this run, it is treated as an EOB run the
4340  size of the remaining coded blocks.
4341
4342 \begin{table}[htbp]
4343 \begin{center}
4344 \begin{tabular}{ccl}\toprule
4345 Token Value  & Extra Bits & EOB Run Lengths                         \\\midrule
4346 $0$          & $0$        & $1$                                     \\
4347 $1$          & $0$        & $2$                                     \\
4348 $2$          & $0$        & $3$                                     \\
4349 $3$          & $2$        & $4\ldots 7$                             \\
4350 $4$          & $3$        & $8\ldots 15$                            \\
4351 $5$          & $4$        & $16\ldots 31$                           \\
4352 $6$          & $12$       & $1\ldots 4095$, or all remaining blocks \\
4353 \bottomrule\end{tabular}
4354 \end{center}
4355 \caption{EOB Token Summary}
4356 \label{tab:eob-tokens}
4357 \end{table}
4358
4359 There is no restriction that one EOB token cannot be immediately followed by
4360  another, so no special cases are necessary to extend the range of the maximum
4361  run length as were required in Section~\ref{sub:long-run}.
4362 Indeed, depending on the lengths of the Huffman codes, it may even cheaper to
4363  encode, by way of example, an EOB run of length 31 followed by an EOB run of
4364  length 1 than to encode an EOB run of length 32 directly.
4365 There is also no restriction that an EOB run stop at the end of a color plane
4366  or a token index.
4367 The run MUST, however, end at or before the end of the frame.
4368
4369 \begin{enumerate}
4370 \item
4371 If \bitvar{TOKEN} is 0, assign \bitvar{EOBS} the value 1.
4372 \item
4373 Otherwise, if \bitvar{TOKEN} is 1, assign \bitvar{EOBS} the value 2.
4374 \item
4375 Otherwise, if \bitvar{TOKEN} is 2, assign \bitvar{EOBS} the value 3.
4376 \item
4377 Otherwise, if \bitvar{TOKEN} is 3:
4378 \begin{enumerate}
4379 \item
4380 Read a 2-bit unsigned integer as \bitvar{EOBS}.
4381 \item
4382 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+4)$.
4383 \end{enumerate}
4384 \item
4385 Otherwise, if \bitvar{TOKEN} is 4:
4386 \begin{enumerate}
4387 \item
4388 Read a 3-bit unsigned integer as \bitvar{EOBS}.
4389 \item
4390 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+8)$.
4391 \end{enumerate}
4392 \item
4393 Otherwise, if \bitvar{TOKEN} is 5:
4394 \begin{enumerate}
4395 \item
4396 Read a 4-bit unsigned integer as \bitvar{EOBS}.
4397 \item
4398 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}+16)$.
4399 \end{enumerate}
4400 \item
4401 Otherwise, \bitvar{TOKEN} is 6:
4402 \begin{enumerate}
4403 \item
4404 Read a 12-bit unsigned integer as \bitvar{EOBS}.
4405 \item
4406 If \bitvar{EOBS} is zero, assign \bitvar{EOBS} to be the number of coded blocks
4407  \locvar{\bj} such that $\bitvar{TIS}[\locvar{\bj}]$ is less than 64.
4408 \end{enumerate}
4409 \item
4410 For each value of \locvar{\tj} from $\bitvar{\ti}$ to 63, assign
4411  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4412 \item
4413 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4414 \item
4415 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value 64.
4416 \item
4417 Assign \bitvar{EOBS} the value $(\bitvar{EOBS}-1)$.
4418 \end{enumerate}
4419
4420 \paragraph{VP3 Compatibility}
4421
4422 The VP3 encoder does not use the special interpretation of a zero-length EOB
4423  run, though its decoder {\em does} support it.
4424 That may be due more to a happy accident in the way the decoder was written
4425  than intentional design, however, and other VP3 implementations might not
4426  reproduce it faithfully.
4427 For backwards compatibility, it may be wise to avoid it, especially as for most
4428  frame sizes there are fewer than 4095 blocks, making it unnecessary.
4429
4430 \subsection{Coefficient Token Decode}
4431 \label{sub:coeff-token}
4432
4433 \paragraph{Input parameters:}\hfill\\*
4434 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4435 \multicolumn{1}{c}{Name} &
4436 \multicolumn{1}{c}{Type} &
4437 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4438 \multicolumn{1}{c}{Signed?} &
4439 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4440 \bitvar{TOKEN}    & Integer &  5 & No  & The token being decoded.
4441 This must be in the range $7\ldots 31$. \\
4442 \bitvar{NBS}      & Integer & 36 & No  & The total number of blocks in a
4443  frame. \\
4444 \bitvar{TIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
4445                                7 & No  & An \bitvar{NBS}-element array of the
4446  current token index for each block. \\
4447 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4448                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
4449  quantized DCT coefficient values for each block in zig-zag order. \\
4450 \bitvar{\bi}      & Integer & 36 & No  & The index of the current block in
4451  coded order. \\
4452 \bitvar{\ti}      & Integer &  6 & No  & The current token index. \\
4453 \bottomrule\end{tabularx}
4454
4455 \paragraph{Output parameters:}\hfill\\*
4456 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4457 \multicolumn{1}{c}{Name} &
4458 \multicolumn{1}{c}{Type} &
4459 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4460 \multicolumn{1}{c}{Signed?} &
4461 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4462 \bitvar{TIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
4463                                7 & No  & An \bitvar{NBS}-element array of the
4464  current token index for each block. \\
4465 \bitvar{NCOEFFS}  & \multicolumn{1}{p{40pt}}{Integer Array} &
4466                                7 & No  & An \bitvar{NBS}-element array of the
4467  coefficient count for each block. \\
4468 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
4469                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
4470  quantized DCT coefficient values for each block in zig-zag order. \\
4471 \bottomrule\end{tabularx}
4472
4473 \paragraph{Variables used:}\hfill\\*
4474 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
4475 \multicolumn{1}{c}{Name} &
4476 \multicolumn{1}{c}{Type} &
4477 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
4478 \multicolumn{1}{c}{Signed?} &
4479 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
4480 \locvar{SIGN}     & Integer &  1 & No & A flag indicating the sign of the
4481  current coefficient. \\
4482 \locvar{MAG}      & Integer & 10 & No & The magnitude of the current
4483  coefficient. \\
4484 \locvar{RLEN}     & Integer &  6 & No & The length of the current zero run. \\
4485 \locvar{\tj}      & Integer &  6 & No & Another token index. \\
4486 \bottomrule\end{tabularx}
4487 \medskip
4488
4489 Each of these tokens decodes one or more coefficients in the current block.
4490 A summary of the meanings of the token values is presented in
4491  Table~\ref{tab:coeff-tokens}.
4492 There are often several different ways to tokenize a given coefficient list.
4493 Which one is optimal depends on the exact lengths of the Huffman codes used to
4494  represent each token.
4495 Note that we do not update the coefficient count for the block if we decode a
4496  pure zero run.
4497
4498 \begin{table}[htbp]
4499 \begin{center}
4500 \begin{tabularx}{\textwidth}{cclX}\toprule
4501 Token Value  & Extra Bits & \multicolumn{1}{p{55pt}}{Number of Coefficients}
4502                                     & Description                    \\\midrule
4503 $7$          & $3$  & $1\ldots 8$   & Short zero run.                \\
4504 $8$          & $6$  & $1\ldots 64$  & Zero run.                      \\
4505 $9$          & $0$  & $1$           & $1$.                           \\
4506 $10$         & $0$  & $1$           & $-1$.                          \\
4507 $11$         & $0$  & $1$           & $2$.                           \\
4508 $12$         & $0$  & $1$           & $-2$.                          \\
4509 $13$         & $1$  & $1$           & $\pm 3$.                       \\
4510 $14$         & $1$  & $1$           & $\pm 4$.                       \\
4511 $15$         & $1$  & $1$           & $\pm 5$.                       \\
4512 $16$         & $1$  & $1$           & $\pm 6$.                       \\
4513 $17$         & $2$  & $1$           & $\pm 7\ldots 8$.               \\
4514 $18$         & $3$  & $1$           & $\pm 9\ldots 12$.              \\
4515 $19$         & $4$  & $1$           & $\pm 13\ldots 20$.             \\
4516 $20$         & $5$  & $1$           & $\pm 21\ldots 36$.             \\
4517 $21$         & $6$  & $1$           & $\pm 37\ldots 68$.             \\
4518 $22$         & $10$ & $1$           & $\pm 69\ldots 580$.            \\
4519 $23$         & $1$  & $2$           & One zero followed by $\pm 1$.  \\
4520 $24$         & $1$  & $3$           & Two zeros followed by $\pm 1$. \\
4521 $25$         & $1$  & $4$           & Three zeros followed by
4522  $\pm 1$. \\
4523 $26$         & $1$  & $5$           & Four zeros followed by
4524  $\pm 1$. \\
4525 $27$         & $1$  & $6$           & Five zeros followed by
4526  $\pm 1$. \\
4527 $28$         & $3$  & $7\ldots 10$  & $6\ldots 9$ zeros followed by
4528  $\pm 1$.  \\
4529 $29$         & $4$  & $11\ldots 18$ & $10\ldots 17$ zeros followed by
4530  $\pm 1$.\\
4531 $30$         & $2$  & $2$           & One zero followed by
4532  $\pm 2\ldots 3$. \\
4533 $31$         & $3$  & $3\ldots 4$   & $2\ldots 3$ zeros followed by
4534  $\pm 2\ldots 3$. \\
4535 \bottomrule\end{tabularx}
4536 \end{center}
4537 \caption{Coefficient Token Summary}
4538 \label{tab:coeff-tokens}
4539 \end{table}
4540
4541 For tokens which represent more than one coefficient, they MUST NOT bring the
4542  total number of coefficients in the block to more than 64.
4543 Care should be taken in a decoder to check for this, as otherwise it may permit
4544  buffer overflows from invalidly formed packets.
4545 \begin{verse}
4546 {\bf Note:} One way to achieve this efficiently is to combine the inverse
4547  zig-zag mapping (described later in Section~\ref{sub:dequant}) with
4548  coefficient decode, and use a table look-up to map zig-zag indices greater
4549  than 63 to a safe location.
4550 \end{verse}
4551
4552 \begin{enumerate}
4553 \item
4554 If \bitvar{TOKEN} is 7:
4555 \begin{enumerate}
4556 \item
4557 Read in a 3-bit unsigned integer as \locvar{RLEN}.
4558 \item
4559 Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$.
4560 \item
4561 For each value of \locvar{\tj} from \bitvar{\ti} to
4562  $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4563   $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4564 \item
4565 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4566  $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$.
4567 \end{enumerate}
4568 \item
4569 Otherwise, if \bitvar{TOKEN} is 8:
4570 \begin{enumerate}
4571 \item
4572 Read in a 6-bit unsigned integer as \locvar{RLEN}.
4573 \item
4574 Assign \locvar{RLEN} the value $(\locvar{RLEN}+1)$.
4575 \item
4576 For each value of \locvar{\tj} from \bitvar{\ti} to
4577  $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4578   $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4579 \item
4580 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4581  $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}$.
4582 \end{enumerate}
4583 \item
4584 Otherwise, if \bitvar{TOKEN} is 9:
4585 \begin{enumerate}
4586 \item
4587 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $1$.
4588 \item
4589 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4590 \item
4591 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4592 \end{enumerate}
4593 \item
4594 Otherwise, if \bitvar{TOKEN} is 10:
4595 \begin{enumerate}
4596 \item
4597 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-1$.
4598 \item
4599 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4600 \item
4601 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4602 \end{enumerate}
4603 \item
4604 Otherwise, if \bitvar{TOKEN} is 11:
4605 \begin{enumerate}
4606 \item
4607 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $2$.
4608 \item
4609 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4610 \item
4611 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4612 \end{enumerate}
4613 \item
4614 Otherwise, if \bitvar{TOKEN} is 12:
4615 \begin{enumerate}
4616 \item
4617 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-2$.
4618 \item
4619 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4620 \item
4621 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4622 \end{enumerate}
4623 \item
4624 Otherwise, if \bitvar{TOKEN} is 13:
4625 \begin{enumerate}
4626 \item
4627 Read a 1-bit unsigned integer as \locvar{SIGN}.
4628 \item
4629 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4630  the value $3$.
4631 \item
4632 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-3$.
4633 \item
4634 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4635 \item
4636 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4637 \end{enumerate}
4638 \item
4639 Otherwise, if \bitvar{TOKEN} is 14:
4640 \begin{enumerate}
4641 \item
4642 Read a 1-bit unsigned integer as \locvar{SIGN}.
4643 \item
4644 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4645  the value $4$.
4646 \item
4647 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-4$.
4648 \item
4649 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4650 \item
4651 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4652 \end{enumerate}
4653 \item
4654 Otherwise, if \bitvar{TOKEN} is 15:
4655 \begin{enumerate}
4656 \item
4657 Read a 1-bit unsigned integer as \locvar{SIGN}.
4658 \item
4659 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4660  the value $5$.
4661 \item
4662 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-5$.
4663 \item
4664 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4665 \item
4666 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4667 \end{enumerate}
4668 \item
4669 Otherwise, if \bitvar{TOKEN} is 16:
4670 \begin{enumerate}
4671 \item
4672 Read a 1-bit unsigned integer as \locvar{SIGN}.
4673 \item
4674 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4675  the value $6$.
4676 \item
4677 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value $-6$.
4678 \item
4679 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4680 \item
4681 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4682 \end{enumerate}
4683 \item
4684 Otherwise, if \bitvar{TOKEN} is 17:
4685 \begin{enumerate}
4686 \item
4687 Read a 1-bit unsigned integer as \locvar{SIGN}.
4688 \item
4689 Read a 1-bit unsigned integer as \locvar{MAG}.
4690 \item
4691 Assign \locvar{MAG} the value $(\locvar{MAG}+7)$.
4692 \item
4693 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4694  the value $\locvar{MAG}$.
4695 \item
4696 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4697  $-\locvar{MAG}$.
4698 \item
4699 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4700 \item
4701 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4702 \end{enumerate}
4703 \item
4704 Otherwise, if \bitvar{TOKEN} is 18:
4705 \begin{enumerate}
4706 \item
4707 Read a 1-bit unsigned integer as \locvar{SIGN}.
4708 \item
4709 Read a 2-bit unsigned integer as \locvar{MAG}.
4710 \item
4711 Assign \locvar{MAG} the value $(\locvar{MAG}+9)$.
4712 \item
4713 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4714  the value $\locvar{MAG}$.
4715 \item
4716 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4717  $-\locvar{MAG}$.
4718 \item
4719 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4720 \item
4721 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4722 \end{enumerate}
4723 \item
4724 Otherwise, if \bitvar{TOKEN} is 19:
4725 \begin{enumerate}
4726 \item
4727 Read a 1-bit unsigned integer as \locvar{SIGN}.
4728 \item
4729 Read a 3-bit unsigned integer as \locvar{MAG}.
4730 \item
4731 Assign \locvar{MAG} the value $(\locvar{MAG}+13)$.
4732 \item
4733 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4734  the value $\locvar{MAG}$.
4735 \item
4736 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4737  $-\locvar{MAG}$.
4738 \item
4739 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4740 \item
4741 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4742 \end{enumerate}
4743 \item
4744 Otherwise, if \bitvar{TOKEN} is 20:
4745 \begin{enumerate}
4746 \item
4747 Read a 1-bit unsigned integer as \locvar{SIGN}.
4748 \item
4749 Read a 4-bit unsigned integer as \locvar{MAG}.
4750 \item
4751 Assign \locvar{MAG} the value $(\locvar{MAG}+21)$.
4752 \item
4753 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4754  the value $\locvar{MAG}$.
4755 \item
4756 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4757  $-\locvar{MAG}$.
4758 \item
4759 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4760 \item
4761 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4762 \end{enumerate}
4763 \item
4764 Otherwise, if \bitvar{TOKEN} is 21:
4765 \begin{enumerate}
4766 \item
4767 Read a 1-bit unsigned integer as \locvar{SIGN}.
4768 \item
4769 Read a 5-bit unsigned integer as \locvar{MAG}.
4770 \item
4771 Assign \locvar{MAG} the value $(\locvar{MAG}+37)$.
4772 \item
4773 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4774  the value $\locvar{MAG}$.
4775 \item
4776 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4777  $-\locvar{MAG}$.
4778 \item
4779 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4780 \item
4781 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4782 \end{enumerate}
4783 \item
4784 Otherwise, if \bitvar{TOKEN} is 22:
4785 \begin{enumerate}
4786 \item
4787 Read a 1-bit unsigned integer as \locvar{SIGN}.
4788 \item
4789 Read a 9-bit unsigned integer as \locvar{MAG}.
4790 \item
4791 Assign \locvar{MAG} the value $(\locvar{MAG}+69)$.
4792 \item
4793 If \locvar{SIGN} is zero, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$
4794  the value $\locvar{MAG}$.
4795 \item
4796 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value
4797  $-\locvar{MAG}$.
4798 \item
4799 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+1$.
4800 \item
4801 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4802 \end{enumerate}
4803 \item
4804 Otherwise, if \bitvar{TOKEN} is 23:
4805 \begin{enumerate}
4806 \item
4807 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}]$ the value zero.
4808 \item
4809 Read a 1-bit unsigned integer as SIGN.
4810 \item
4811 If \locvar{SIGN} is zero, assign
4812  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $1$.
4813 \item
4814 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value
4815  $-1$.
4816 \item
4817 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$.
4818 \item
4819 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4820 \end{enumerate}
4821 \item
4822 Otherwise, if \bitvar{TOKEN} is 24:
4823 \begin{enumerate}
4824 \item
4825 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+1)$, assign
4826  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4827 \item
4828 Read a 1-bit unsigned integer as SIGN.
4829 \item
4830 If \locvar{SIGN} is zero, assign
4831  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value $1$.
4832 \item
4833 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+2]$ the value
4834  $-1$.
4835 \item
4836 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+3$.
4837 \item
4838 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4839 \end{enumerate}
4840 \item
4841 Otherwise, if \bitvar{TOKEN} is 25:
4842 \begin{enumerate}
4843 \item
4844 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+2)$, assign
4845  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4846 \item
4847 Read a 1-bit unsigned integer as SIGN.
4848 \item
4849 If \locvar{SIGN} is zero, assign
4850  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value $1$.
4851 \item
4852 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+3]$ the value
4853  $-1$.
4854 \item
4855 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+4$.
4856 \item
4857 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4858 \end{enumerate}
4859 \item
4860 Otherwise, if \bitvar{TOKEN} is 26:
4861 \begin{enumerate}
4862 \item
4863 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+3)$, assign
4864  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4865 \item
4866 Read a 1-bit unsigned integer as SIGN.
4867 \item
4868 If \locvar{SIGN} is zero, assign
4869  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value $1$.
4870 \item
4871 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+4]$ the value
4872  $-1$.
4873 \item
4874 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+5$.
4875 \item
4876 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4877 \end{enumerate}
4878 \item
4879 Otherwise, if \bitvar{TOKEN} is 27:
4880 \begin{enumerate}
4881 \item
4882 For each value of \locvar{\tj} from \bitvar{\ti} to $(\bitvar{\ti}+4)$, assign
4883  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4884 \item
4885 Read a 1-bit unsigned integer as SIGN.
4886 \item
4887 If \locvar{SIGN} is zero, assign
4888  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value $1$.
4889 \item
4890 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+5]$ the value
4891  $-1$.
4892 \item
4893 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+6$.
4894 \item
4895 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4896 \end{enumerate}
4897 \item
4898 Otherwise, if \bitvar{TOKEN} is 28:
4899 \begin{enumerate}
4900 \item
4901 Read a 1-bit unsigned integer as \locvar{SIGN}.
4902 \item
4903 Read a 2-bit unsigned integer as \locvar{RLEN}.
4904 \item
4905 Assign \locvar{RLEN} the value $(\locvar{RLEN}+6)$.
4906 \item
4907 For each value of \locvar{\tj} from \bitvar{\ti} to
4908  $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4909  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4910 \item
4911 If \locvar{SIGN} is zero, assign
4912  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$.
4913 \item
4914 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4915  the value $-1$.
4916 \item
4917 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4918  $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4919 \item
4920 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4921 \end{enumerate}
4922 \item
4923 Otherwise, if \bitvar{TOKEN} is 29:
4924 \begin{enumerate}
4925 \item
4926 Read a 1-bit unsigned integer as \locvar{SIGN}.
4927 \item
4928 Read a 3-bit unsigned integer as \locvar{RLEN}.
4929 \item
4930 Assign \locvar{RLEN} the value $(\locvar{RLEN}+10)$.
4931 \item
4932 For each value of \locvar{\tj} from \bitvar{\ti} to
4933  $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4934  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4935 \item
4936 If \locvar{SIGN} is zero, assign
4937  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value $1$.
4938 \item
4939 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4940  the value $-1$.
4941 \item
4942 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4943  $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4944 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4945 \end{enumerate}
4946 \item
4947 Otherwise, if \bitvar{TOKEN} is 30:
4948 \begin{enumerate}
4949 \item
4950 Assign $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\ti}]$ the value zero.
4951 \item
4952 Read a 1-bit unsigned integer as \locvar{SIGN}.
4953 \item
4954 Read a 1-bit unsigned integer as \locvar{MAG}.
4955 \item
4956 Assign \locvar{MAG} the value $(\locvar{MAG}+2)$.
4957 \item
4958 If \locvar{SIGN} is zero, assign
4959  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value $\locvar{MAG}$.
4960 \item
4961 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+1]$ the value
4962  $-\locvar{MAG}$.
4963 \item
4964 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]+2$.
4965 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4966 \end{enumerate}
4967 \item
4968 Otherwise, if \bitvar{TOKEN} is 31:
4969 \begin{enumerate}
4970 \item
4971 Read a 1-bit unsigned integer as \locvar{SIGN}.
4972 \item
4973 Read a 1-bit unsigned integer as \locvar{MAG}.
4974 \item
4975 Assign \locvar{MAG} the value $(\locvar{MAG}+2)$.
4976 \item
4977 Read a 1-bit unsigned integer as \locvar{RLEN}.
4978 \item
4979 Assign \locvar{RLEN} the value $(\locvar{RLEN}+2)$.
4980 \item
4981 For each value of \locvar{\tj} from \bitvar{\ti} to
4982  $(\bitvar{\ti}+\locvar{RLEN}-1)$, assign
4983  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\tj}]$ the value zero.
4984 \item
4985 If \locvar{SIGN} is zero, assign
4986  $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$ the value
4987  $\locvar{MAG}$.
4988 \item
4989 Otherwise, assign $\bitvar{COEFFS}[\bitvar{\bi}][\bitvar{\ti}+\locvar{RLEN}]$
4990  the value $-\locvar{MAG}$.
4991 \item
4992 Assign $\bitvar{TIS}[\bitvar{\bi}]$ the value
4993  $\bitvar{TIS}[\bitvar{\bi}]+\locvar{RLEN}+1$.
4994 Assign $\bitvar{NCOEFFS}[\bitvar{\bi}]$ the value $\bitvar{TIS}[\bitvar{\bi}]$.
4995 \end{enumerate}
4996 \end{enumerate}
4997
4998 \subsection{DCT Coefficient Decode}
4999 \label{sub:dct-coeffs}
5000
5001 \paragraph{Input parameters:}\hfill\\*
5002 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5003 \multicolumn{1}{c}{Name} &
5004 \multicolumn{1}{c}{Type} &
5005 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5006 \multicolumn{1}{c}{Signed?} &
5007 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5008 \bitvar{NBS}      & Integer & 36 & No  & The total number of blocks in a
5009  frame. \\
5010 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5011                                1 & No & An \bitvar{NBS}-element array of flags
5012  indicating which blocks are coded. \\
5013 \bitvar{NMBS}     & Integer & 32 & No & The total number of macro blocks in a
5014  frame. \\
5015 \bitvar{HTS} & \multicolumn{3}{l}{Huffman table array}
5016                                      & An 80-element array of Huffman tables
5017  with up to 32 entries each. \\
5018 \bottomrule\end{tabularx}
5019
5020 \paragraph{Output parameters:}\hfill\\*
5021 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5022 \multicolumn{1}{c}{Name} &
5023 \multicolumn{1}{c}{Type} &
5024 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5025 \multicolumn{1}{c}{Signed?} &
5026 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5027 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5028                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
5029  quantized DCT coefficient values for each block in zig-zag order. \\
5030 \bitvar{NCOEFFS}  & \multicolumn{1}{p{40pt}}{Integer Array} &
5031                                7 & No  & An \bitvar{NBS}-element array of the
5032  coefficient count for each block. \\
5033 \bottomrule\end{tabularx}
5034
5035 \paragraph{Variables used:}\hfill\\*
5036 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5037 \multicolumn{1}{c}{Name} &
5038 \multicolumn{1}{c}{Type} &
5039 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5040 \multicolumn{1}{c}{Signed?} &
5041 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5042 \locvar{NLBS}     & Integer & 34 & No & The number of blocks in the luma
5043  plane. \\
5044 \locvar{TIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
5045                                7 & No & An \bitvar{NBS}-element array of the
5046  current token index for each block. \\
5047 \locvar{EOBS}     & Integer & 36 & No & The remaining length of the current
5048  EOB run. \\
5049 \locvar{TOKEN}    & Integer &  5 & No & The current token being decoded. \\
5050 \locvar{HG}       & Integer &  3 & No & The current Huffman table group. \\
5051 \locvar{\cbi}     & Integer & 36 & No & The index of the current block in the
5052  coded block list. \\
5053 \locvar{\bi}      & Integer & 36 & No & The index of the current block in
5054  coded order. \\
5055 \locvar{\bj}      & Integer & 36 & No & Another index of a block in coded
5056  order. \\
5057 \locvar{\ti}      & Integer &  6 & No & The current token index. \\
5058 \locvar{\tj}      & Integer &  6 & No & Another token index. \\
5059 \locvar{\hti_L}   & Integer &  4 & No & The index of the current Huffman table
5060  to use for the luma plane within a group. \\
5061 \locvar{\hti_C}   & Integer &  4 & No & The index of the current Huffman table
5062  to use for the chroma planes within a group. \\
5063 \locvar{\hti}     & Integer &  7 & No & The index of the current Huffman table
5064  to use. \\
5065 \bottomrule\end{tabularx}
5066 \medskip
5067
5068 This procedure puts the above two procedures to work to decode the entire set
5069  of DCT coefficients for the frame.
5070 At the end of this procedure, \locvar{EOBS} MUST be zero, and
5071  $\locvar{TIS}[\locvar{\bi}]$ MUST be 64 for every coded \locvar{\bi}.
5072
5073 Note that we update the coefficient count of every block before continuing an
5074  EOB run or decoding a token, despite the fact that it is already up to date
5075  unless the previous token was a pure zero run.
5076 This is done intentionally to mimic the VP3 accounting rules.
5077 Thus the only time the coefficient count does not include the coefficients in a
5078  pure zero run is when when that run reaches all the way to coefficient 63.
5079 Note, however, that regardless of the coefficient count, any additional
5080  coefficients are still set to zero.
5081 The only use of the count is in determining if a special case of the inverse
5082  DCT can be used in Section~\ref{sub:2d-idct}.
5083
5084 \begin{enumerate}
5085 \item
5086 Assign \locvar{NLBS} the value $(\bitvar{NMBS}*4)$.
5087 \item
5088 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$,
5089  assign $\locvar{TIS}[\locvar{\bi}]$ the value zero.
5090 \item
5091 Assign \locvar{EOBS} the value 0.
5092 \item
5093 For each consecutive value of \locvar{\ti} from 0 to 63:
5094 \begin{enumerate}
5095 \item
5096 If \locvar{\ti} is $0$ or $1$:
5097 \begin{enumerate}
5098 \item
5099 Read a 4-bit unsigned integer as \locvar{\hti_L}.
5100 \item
5101 Read a 4-bit unsigned integer as \locvar{\hti_C}.
5102 \end{enumerate}
5103 \item
5104 For each consecutive value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$ for
5105  which $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero and
5106  $\locvar{TIS}[\locvar{\bi}]$ equals \locvar{\ti}:
5107 \begin{enumerate}
5108 \item
5109 Assign $\bitvar{NCOEFFS}[\locvar{\bi}]$ the value \locvar{\ti}.
5110 \item
5111 If \locvar{EOBS} is greater than zero:
5112 \begin{enumerate}
5113 \item
5114 For each value of \locvar{\tj} from $\locvar{\ti}$ to 63, assign
5115  $\bitvar{COEFFS}[\locvar{\bi}][\locvar{\tj}]$ the value zero.
5116 \item
5117 Assign $\locvar{TIS}[\locvar{\bi}]$ the value 64.
5118 \item
5119 Assign \locvar{EOBS} the value $(\locvar{EOBS}-1)$.
5120 \end{enumerate}
5121 \item
5122 Otherwise:
5123 \begin{enumerate}
5124 \item
5125 Assign \locvar{HG} a value based on \locvar{\ti} from
5126  Table~\ref{tab:huff-groups}.
5127
5128 \begin{table}[htbp]
5129 \begin{center}
5130 \begin{tabular}{lc}\toprule
5131 \locvar{\ti}  & \locvar{HG} \\\midrule
5132 $0$           & $0$ \\
5133 $1\ldots 5$   & $1$ \\
5134 $6\ldots 14$  & $2$ \\
5135 $15\ldots 27$ & $3$ \\
5136 $28\ldots 63$ & $4$ \\
5137 \bottomrule\end{tabular}
5138 \end{center}
5139 \caption{Huffman Table Groups}
5140 \label{tab:huff-groups}
5141 \end{table}
5142
5143 \item
5144 If \locvar{\bi} is less than \locvar{NLBS}, assign \locvar{\hti} the value
5145  $(16*\locvar{HG}+\locvar{\hti_L})$.
5146 \item
5147 Otherwise, assign \locvar{\hti} the value
5148  $(16*\locvar{HG}+\locvar{\hti_C})$.
5149 \item
5150 Read one bit at a time until one of the codes in $\bitvar{HTS}[\locvar{\hti}]$
5151  is recognized, and assign the value to \locvar{TOKEN}.
5152 \item
5153 If \locvar{TOKEN} is less than 7, expand an EOB token using the procedure given
5154  in Section~\ref{sub:eob-token} to update $\locvar{TIS}[\locvar{\bi}]$,
5155  $\bitvar{COEFFS}[\locvar{\bi}]$, and \locvar{EOBS}.
5156 \item
5157 Otherwise, expand a coefficient token using the procedure given in
5158  Section~\ref{sub:coeff-token} to update $\locvar{TIS}[\locvar{\bi}]$,
5159  $\bitvar{COEFFS}[\locvar{\bi}]$, and $\bitvar{NCOEFFS}[\locvar{\bi}]$.
5160 \end{enumerate}
5161 \end{enumerate}
5162 \end{enumerate}
5163 \end{enumerate}
5164
5165 \section{Undoing DC Prediction}
5166
5167 The actual value of a DC coefficient decoded by Section~\ref{sec:dct-decode} is
5168  the residual from a predicted value computed by the encoder.
5169 This prediction is only applied to DC coefficients.
5170 Quantized AC coefficients are encoded directly.
5171
5172 This section describes how to undo this prediction to recover the original
5173  DC coefficients.
5174 The predicted DC value for a block is computed from the DC values of its
5175  immediate neighbors which precede the block in raster order.
5176 Thus, reversing this prediction must procede in raster order, instead of coded
5177  order.
5178
5179 Note that this step comes before dequantizing the coefficients.
5180 For this reason, DC coefficients are all quantized with the same \qi\ value,
5181  regardless of the block-level \qi\ values decoded in
5182  Section~\ref{sub:block-qis}.
5183 Those \qi\ values are applied only to the AC coefficients.
5184
5185 \subsection{Computing the DC Predictor}
5186 \label{sub:dc-pred}
5187
5188 \paragraph{Input parameters:}\hfill\\*
5189 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5190 \multicolumn{1}{c}{Name} &
5191 \multicolumn{1}{c}{Type} &
5192 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5193 \multicolumn{1}{c}{Signed?} &
5194 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5195 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5196                                1 & No  & An \bitvar{NBS}-element array of flags
5197  indicating which blocks are coded. \\
5198 \bitvar{MBMODES}  & \multicolumn{1}{p{40pt}}{Integer Array} &
5199                                3 & No  & An \bitvar{NMBS}-element array of
5200  coding modes for each macro block. \\
5201 \bitvar{LASTDC}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5202                               16 & Yes & A 3-element array containing the
5203  most recently decoded DC value, one for inter mode and for each reference
5204  frame. \\
5205 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5206                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
5207  quantized DCT coefficient values for each block in zig-zag order. \\
5208 \bitvar{\bi}      & Integer & 36 & No  & The index of the current block in
5209  coded order. \\
5210 \bottomrule\end{tabularx}
5211
5212 \paragraph{Output parameters:}\hfill\\*
5213 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5214 \multicolumn{1}{c}{Name} &
5215 \multicolumn{1}{c}{Type} &
5216 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5217 \multicolumn{1}{c}{Signed?} &
5218 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5219 \bitvar{DCPRED} & Integer & 16 & Yes & The predicted DC value for the current
5220  block. \\
5221 \bottomrule\end{tabularx}
5222
5223 \paragraph{Variables used:}\hfill\\*
5224 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5225 \multicolumn{1}{c}{Name} &
5226 \multicolumn{1}{c}{Type} &
5227 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5228 \multicolumn{1}{c}{Signed?} &
5229 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5230 \locvar{P}        & \multicolumn{1}{p{40pt}}{Integer Array} &
5231                                1 & No  & A 4-element array indicating which
5232  neighbors can be used for DC prediction. \\
5233 \locvar{PBI}      & \multicolumn{1}{p{40pt}}{Integer Array} &
5234                               36 & No  & A 4-element array containing the
5235  coded-order block index of the current block's neighbors. \\
5236 \locvar{W}        & \multicolumn{1}{p{40pt}}{Integer Array} &
5237                                7 & Yes & A 4-element array of the weights to
5238  apply to each neighboring DC value. \\
5239 \locvar{PDIV}     & Integer &  8 & No  & The valud to divide the weighted sum
5240  by. \\
5241 \locvar{\bj}      & Integer & 36 & No  & The index of a neighboring block in
5242  coded order. \\
5243 \locvar{\mbi}     & Integer & 32 & No  & The index of the macro block
5244  containing block \locvar{\bi}. \\
5245 \locvar{\mbi}     & Integer & 32 & No  & The index of the macro block
5246  containing block \locvar{\bj}. \\
5247 \locvar{\rfi}     & Integer &  2 & No  & The index of the reference frame
5248  indicated by the coding mode for macro block \locvar{\mbi}. \\
5249 \bottomrule\end{tabularx}
5250 \medskip
5251
5252 This procedure outlines how a predictor is formed for a single block.
5253
5254 The predictor is computed as a weighted sum of the neighboring DC values from
5255  coded blocks which use the same reference frame.
5256 This latter condition is determined only by checking the coding mode for the
5257  block.
5258 Even if the golden frame and the previous frame are in fact the same, e.g. for
5259  the first inter frame after an intra frame, they are still treated as being
5260  different for the purposes of DC prediction.
5261 The weighted sum is divided by a power of two, with truncation towards zero,
5262  and the result is checked for outranging if necessary.
5263
5264 If there are no neighboring coded blocks which use the same reference frame as
5265  the current block, then the most recent DC value of any block that used that
5266  reference frame is used instead.
5267 If no such block exists, then the predictor is set to zero.
5268
5269 \begin{enumerate}
5270 \item
5271 Assign \locvar{\mbi} the index of the macro block containing block
5272  \bitvar{\bi}.
5273 \item
5274 Assign \locvar{\rfi} the value of the Reference Frame Index column of
5275  Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
5276
5277 \begin{table}[htpb]
5278 \begin{center}
5279 \begin{tabular}{ll}\toprule
5280 Coding Mode               & Reference Frame Index \\\midrule
5281 $0$ (INTER\_NOMV)         & $1$ (Previous)        \\
5282 $1$ (INTRA)               & $0$ (None)            \\
5283 $2$ (INTER\_MV)           & $1$ (Previous)        \\
5284 $3$ (INTER\_MV\_LAST)     & $1$ (Previous)        \\
5285 $4$ (INTER\_MV\_LAST2)    & $1$ (Previous)        \\
5286 $5$ (INTER\_GOLDEN\_NOMV) & $2$ (Golden)          \\
5287 $6$ (INTER\_GOLDEN\_MV)   & $2$ (Golden)          \\
5288 $7$ (INTER\_MV\_FOUR)     & $1$ (Previous)        \\
5289 \bottomrule\end{tabular}
5290 \end{center}
5291 \caption{Reference Frames for Each Coding Mode}
5292 \label{tab:cm-refs}
5293 \end{table}
5294
5295 \item
5296 If block \locvar{\bi} is not along the left edge of the coded frame:
5297 \begin{enumerate}
5298 \item
5299 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s left
5300  neighbor, i.e., in the same row but one column to the left.
5301 \item
5302 If $\bitvar{BCODED}[\bj]$ is not zero:
5303 \begin{enumerate}
5304 \item
5305 Assign \locvar{\mbj} the index of the macro block containing block
5306  \locvar{\bj}.
5307 \item
5308 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5309  corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5310 \begin{enumerate}
5311 \item
5312 Assign $\locvar{P}[0]$ the value $1$.
5313 \item
5314 Assign $\locvar{PBI}[0]$ the value \locvar{\bj}.
5315 \end{enumerate}
5316 \item
5317 Otherwise, assign $\locvar{P}[0]$ the value zero.
5318 \end{enumerate}
5319 \item
5320 Otherwise, assign $\locvar{P}[0]$ the value zero.
5321 \end{enumerate}
5322 \item
5323 Otherwise, assign $\locvar{P}[0]$ the value zero.
5324
5325 \item
5326 If block \locvar{\bi} is not along the left edge nor the bottom edge of the
5327  coded frame:
5328 \begin{enumerate}
5329 \item
5330 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-left
5331  neighbor, i.e., one row down and one column to the left.
5332 \item
5333 If $\bitvar{BCODED}[\bj]$ is not zero:
5334 \begin{enumerate}
5335 \item
5336 Assign \locvar{\mbj} the index of the macro block containing block
5337  \locvar{\bj}.
5338 \item
5339 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5340  corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5341 \begin{enumerate}
5342 \item
5343 Assign $\locvar{P}[1]$ the value $1$.
5344 \item
5345 Assign $\locvar{PBI}[1]$ the value \locvar{\bj}.
5346 \end{enumerate}
5347 \item
5348 Otherwise, assign $\locvar{P}[1]$ the value zero.
5349 \end{enumerate}
5350 \item
5351 Otherwise, assign $\locvar{P}[1]$ the value zero.
5352 \end{enumerate}
5353 \item
5354 Otherwise, assign $\locvar{P}[1]$ the value zero.
5355
5356 \item
5357 If block \locvar{\bi} is not along the the bottom edge of the coded frame:
5358 \begin{enumerate}
5359 \item
5360 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower
5361  neighbor, i.e., in the same column but one row down.
5362 \item
5363 If $\bitvar{BCODED}[\bj]$ is not zero:
5364 \begin{enumerate}
5365 \item
5366 Assign \locvar{\mbj} the index of the macro block containing block
5367  \locvar{\bj}.
5368 \item
5369 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5370  corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5371 \begin{enumerate}
5372 \item
5373 Assign $\locvar{P}[2]$ the value $1$.
5374 \item
5375 Assign $\locvar{PBI}[2]$ the value \locvar{\bj}.
5376 \end{enumerate}
5377 \item
5378 Otherwise, assign $\locvar{P}[2]$ the value zero.
5379 \end{enumerate}
5380 \item
5381 Otherwise, assign $\locvar{P}[2]$ the value zero.
5382 \end{enumerate}
5383 \item
5384 Otherwise, assign $\locvar{P}[2]$ the value zero.
5385
5386 \item
5387 If block \locvar{\bi} is not along the right edge nor the bottom edge of the
5388  coded frame:
5389 \begin{enumerate}
5390 \item
5391 Assign \locvar{\bj} the coded-order index of block \locvar{\bi}'s lower-right
5392  neighbor, i.e., one row down and one column to the right.
5393 \item
5394 If $\bitvar{BCODED}[\bj]$ is not zero:
5395 \begin{enumerate}
5396 \item
5397 Assign \locvar{\mbj} the index of the macro block containing block
5398  \locvar{\bj}.
5399 \item
5400 If the value of the Reference Frame Index column of Table~\ref{tab:cm-refs}
5401  corresonding to $\bitvar{MBMODES}[\locvar{\mbj}]$ equals \locvar{\rfi}:
5402 \begin{enumerate}
5403 \item
5404 Assign $\locvar{P}[3]$ the value $1$.
5405 \item
5406 Assign $\locvar{PBI}[3]$ the value \locvar{\bj}.
5407 \end{enumerate}
5408 \item
5409 Otherwise, assign $\locvar{P}[3]$ the value zero.
5410 \end{enumerate}
5411 \item
5412 Otherwise, assign $\locvar{P}[3]$ the value zero.
5413 \end{enumerate}
5414 \item
5415 Otherwise, assign $\locvar{P}[3]$ the value zero.
5416
5417 \item
5418 If none of the values $\locvar{P}[0]$, $\locvar{P}[1]$, $\locvar{P}[2]$, nor
5419  $\locvar{P}[3]$ are non-zero, then assign \bitvar{DCPRED} the value
5420  $\bitvar{LASTDC}[\locvar{\rfi}]$.
5421 \item
5422 Otherwise:
5423 \begin{enumerate}
5424 \item
5425 Assign the array \locvar{W} and the variable \locvar{PDIV} the values from the
5426  row of Table~\ref{tab:dc-weights} corresonding to the values of each
5427  $\locvar{P}[\idx{i}]$.
5428
5429 \begin{table}[htb]
5430 \begin{center}
5431 \begin{tabular}{ccccrrrrr}\toprule
5432 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[0]$ (L)} &
5433 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[1]$ (DL)} &
5434 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[2]$ (D)} &
5435 \multicolumn{1}{p{25pt}}{\centering$\locvar{P}[3]$ (DR)} &
5436 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (L)} &
5437 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[1]$ (DL)} &
5438 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[2]$ (D)} &
5439 \multicolumn{1}{p{25pt}}{\centering$\locvar{W}[3]$ (DR)} &
5440 \locvar{PDIV} \\\midrule
5441 $1$ & $0$ & $0$ & $0$ &  $1$ &   $0$ &  $0$ &  $0$ &   $1$ \\
5442 $0$ & $1$ & $0$ & $0$ &  $0$ &   $1$ &  $0$ &  $0$ &   $1$ \\
5443 $1$ & $1$ & $0$ & $0$ &  $1$ &   $0$ &  $0$ &  $0$ &   $1$ \\
5444 $0$ & $0$ & $1$ & $0$ &  $0$ &   $0$ &  $1$ &  $0$ &   $1$ \\
5445 $1$ & $0$ & $1$ & $0$ &  $1$ &   $0$ &  $1$ &  $0$ &   $2$ \\
5446 $0$ & $1$ & $1$ & $0$ &  $0$ &   $0$ &  $1$ &  $0$ &   $1$ \\
5447 $1$ & $1$ & $1$ & $0$ & $29$ & $-26$ & $29$ &  $0$ &  $32$ \\
5448 $0$ & $0$ & $0$ & $1$ &  $0$ &   $0$ &  $0$ &  $1$ &   $1$ \\
5449 $1$ & $0$ & $0$ & $1$ & $75$ &   $0$ &  $0$ & $53$ & $128$ \\
5450 $0$ & $1$ & $0$ & $1$ &  $0$ &   $1$ &  $0$ &  $1$ &   $2$ \\
5451 $1$ & $1$ & $0$ & $1$ & $75$ &   $0$ &  $0$ & $53$ & $128$ \\
5452 $0$ & $0$ & $1$ & $1$ &  $0$ &   $0$ &  $1$ &  $0$ &   $1$ \\
5453 $1$ & $0$ & $1$ & $1$ & $75$ &   $0$ &  $0$ & $53$ & $128$ \\
5454 $0$ & $1$ & $1$ & $1$ &  $0$ &   $3$ & $10$ &  $3$ &  $16$ \\
5455 $1$ & $1$ & $1$ & $1$ & $29$ & $-26$ & $29$ &  $0$ &  $32$ \\
5456 \bottomrule\end{tabular}
5457 \end{center}
5458 \caption{Weights and Divisors for Each Set of Available DC Predictors}
5459 \label{tab:dc-weights}
5460 \end{table}
5461
5462 \item
5463 Assign \bitvar{DCPRED} the value zero.
5464 \item
5465 If $\locvar{P}[0]$ is non-zero, assign \bitvar{DCPRED} the value
5466  $(\bitvar{DCPRED}+\locvar{W}[0]*\bitvar{COEFFS}[\locvar{PBI}[0]][0])$.
5467 \item
5468 If $\locvar{P}[1]$ is non-zero, assign \bitvar{DCPRED} the value
5469  $(\bitvar{DCPRED}+\locvar{W}[1]*\bitvar{COEFFS}[\locvar{PBI}[1]][0])$.
5470 \item
5471 If $\locvar{P}[2]$ is non-zero, assign \bitvar{DCPRED} the value
5472  $(\bitvar{DCPRED}+\locvar{W}[2]*\bitvar{COEFFS}[\locvar{PBI}[2]][0])$.
5473 \item
5474 If $\locvar{P}[3]$ is non-zero, assign \bitvar{DCPRED} the value
5475  $(\bitvar{DCPRED}+\locvar{W}[3]*\bitvar{COEFFS}[\locvar{PBI}[3]][0])$.
5476 \item
5477 Assign \bitvar{DCPRED} the value $(\bitvar{DCPRED}//\locvar{PDIV})$.
5478 \item
5479 If $\locvar{P}[0]$, $\locvar{P}[1]$, and $\locvar{P}[2]$ are all non-zero:
5480 \begin{enumerate}
5481 \item
5482 If $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[2]][0]|$ is greater than
5483  $128$, assign \bitvar{DCPRED} the value $\bitvar{COEFFS}[\locvar{PBI}[2]][0]$.
5484 \item
5485 Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[0]][0]|$ is
5486  greater than $128$, assign \bitvar{DCPRED} the value
5487  $\bitvar{COEFFS}[\locvar{PBI}[0]][0]$.
5488 \item
5489 Otherwise, if $|\bitvar{DCPRED}-\bitvar{COEFFS}[\locvar{PBI}[1]][0]|$ is
5490  greater than $128$, assign \bitvar{DCPRED} the value
5491  $\bitvar{COEFFS}[\locvar{PBI}[1]][0]$.
5492 \end{enumerate}
5493 \end{enumerate}
5494 \end{enumerate}
5495
5496 \subsection{Inverting the DC Prediction Process}
5497 \label{sub:dc-pred-undo}
5498
5499 \paragraph{Input parameters:}\hfill\\*
5500 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5501 \multicolumn{1}{c}{Name} &
5502 \multicolumn{1}{c}{Type} &
5503 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5504 \multicolumn{1}{c}{Signed?} &
5505 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5506 \bitvar{BCODED}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5507                                1 & No  & An \bitvar{NBS}-element array of flags
5508  indicating which blocks are coded. \\
5509 \bitvar{MBMODES}  & \multicolumn{1}{p{40pt}}{Integer Array} &
5510                                3 & No  & An \bitvar{NMBS}-element array of
5511  coding modes for each macro block. \\
5512 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5513                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
5514  quantized DCT coefficient values for each block in zig-zag order. \\
5515 \bottomrule\end{tabularx}
5516
5517 \paragraph{Output parameters:}\hfill\\*
5518 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5519 \multicolumn{1}{c}{Name} &
5520 \multicolumn{1}{c}{Type} &
5521 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5522 \multicolumn{1}{c}{Signed?} &
5523 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5524 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5525                               16 & Yes & An $\bitvar{NBS}\times 64$ array of
5526  quantized DCT coefficient values for each block in zig-zag order. The DC
5527  value of each block will be updated. \\
5528 \bottomrule\end{tabularx}
5529
5530 \paragraph{Variables used:}\hfill\\*
5531 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5532 \multicolumn{1}{c}{Name} &
5533 \multicolumn{1}{c}{Type} &
5534 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5535 \multicolumn{1}{c}{Signed?} &
5536 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5537 \locvar{LASTDC}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5538                               16 & Yes & A 3-element array containing the
5539  most recently decoded DC value, one for inter mode and for each reference
5540  frame. \\
5541 \locvar{DCPRED}   & Integer & 11 & Yes & The predicted DC value for the current
5542  block. \\
5543 \locvar{DC}       & Integer & 17 & Yes & The actual DC value for the current
5544  block. \\
5545 \locvar{\bi}      & Integer & 36 & No  & The index of the current block in
5546  coded order. \\
5547 \locvar{\mbi}     & Integer & 32 & No  & The index of the macro block
5548  containing block \locvar{\bi}. \\
5549 \locvar{\rfi}     & Integer &  2 & No  & The index of the reference frame
5550  indicated by the coding mode for macro block \locvar{\mbi}. \\
5551 \bottomrule\end{tabularx}
5552 \medskip
5553
5554 This procedure describes the complete process of undoing the DC prediction to
5555  recover the original DC values.
5556 Because it is possible to add a value as large as $580$ to the predicted DC
5557  coefficient value at every block, which will then be used to increase the
5558  predictor for the next block, the reconstructed DC value could overflow a
5559  16-bit integer.
5560 This is handled by truncating the result to a 16-bit signed representation,
5561  simply throwing away any higher bits in the two's complement representation of
5562  the number.
5563
5564 \begin{enumerate}
5565 \item
5566 Assign $\locvar{LASTDC}[0]$ the value zero.
5567 \item
5568 Assign $\locvar{LASTDC}[1]$ the value zero.
5569 \item
5570 Assign $\locvar{LASTDC}[2]$ the value zero.
5571 \item
5572 For each block in {\em raster} order, with coded-order index \locvar{\bi}:
5573 \begin{enumerate}
5574 \item
5575 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
5576 \begin{enumerate}
5577 \item
5578 Compute the value \locvar{DCPRED} using the procedure outlined in
5579  Section~\ref{sub:dc-pred}.
5580 \item
5581 Assign \locvar{DC} the value
5582  $(\bitvar{COEFFS}[\locvar{\bi}][0]+\locvar{DCPRED})$.
5583 \item
5584 Truncate \locvar{DC} to a 16-bit representation by dropping any higher-order
5585  bits.
5586 \item
5587 Assign $\bitvar{COEFFS}[\locvar{\bi}][0]$ the value \locvar{DC}.
5588 \item
5589 Assign \locvar{\mbi} the index of the macro block containing block
5590  \locvar{\bi}.
5591 \item
5592 Assign \locvar{\rfi} the value of the Reference Frame Index column of
5593  Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
5594 \item
5595 Assign $\locvar{LASTDC}[\rfi]$ the value $\locvar{DC}$.
5596 \end{enumerate}
5597 \end{enumerate}
5598 \end{enumerate}
5599
5600 \section{Reconstruction}
5601
5602 At this stage, the complete contents of the data packet have been decoded.
5603 All that remains is to reconstruct the contents of the new frame.
5604 This is applied on a block by block basis, and as each block is independent,
5605  the order they are processed in does not matter.
5606
5607 \subsection{Predictors}
5608 \label{sec:predictors}
5609
5610 For each block, a predictor is formed based on its coding mode and motion
5611  vector.
5612 There are three basic types of predictors: the intra predictor, the whole-pixel
5613  predictor, and the half-pixel predictor.
5614 The former is used for all blocks coded in INTRA mode, while all other blocks
5615  use one of the latter two.
5616 The whole-pixel predictor is used if the fractional part of both motion vector
5617  components is zero, otherwise the half-pixel predictor is used.
5618
5619 \subsubsection{The Intra Predictor}
5620 \label{sub:predintra}
5621
5622 \paragraph{Input parameters:} None.
5623
5624 \paragraph{Output parameters:}\hfill\\*
5625 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5626 \multicolumn{1}{c}{Name} &
5627 \multicolumn{1}{c}{Type} &
5628 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5629 \multicolumn{1}{c}{Signed?} &
5630 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5631 \bitvar{PRED}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5632                                8 & No  & An $8\times 8$ array of predictor
5633  values to use for INTRA coded blocks. \\
5634 \bottomrule\end{tabularx}
5635
5636 \paragraph{Variables used:}\hfill\\*
5637 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5638 \multicolumn{1}{c}{Name} &
5639 \multicolumn{1}{c}{Type} &
5640 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5641 \multicolumn{1}{c}{Signed?} &
5642 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5643 \locvar{\idx{bx}}  & Integer &  3 & No  & The horizontal pixel index in the
5644  block. \\
5645 \locvar{\idx{by}}  & Integer &  3 & No  & The vertical pixel index in the
5646  block. \\
5647 \bottomrule\end{tabularx}
5648 \medskip
5649
5650 The intra predictor is nothing more than the constant value $128$.
5651 This is applied for the sole purpose of centering the range of possible DC
5652  values for INTRA blocks around zero.
5653
5654 \begin{enumerate}
5655 \item
5656 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5657 \begin{enumerate}
5658 \item
5659 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5660 \begin{enumerate}
5661 \item
5662 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value $128$.
5663 \end{enumerate}
5664 \end{enumerate}
5665 \end{enumerate}
5666
5667 \subsubsection{The Whole-Pixel Predictor}
5668 \label{sub:predfullpel}
5669
5670 \paragraph{Input parameters:}\hfill\\*
5671 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5672 \multicolumn{1}{c}{Name} &
5673 \multicolumn{1}{c}{Type} &
5674 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5675 \multicolumn{1}{c}{Signed?} &
5676 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5677 \bitvar{RPW}   & Integer   & 20 & No  & The width of the current plane of the
5678  reference frame in pixels. \\
5679 \bitvar{RPH}   & Integer   & 20 & No  & The height of the current plane of the
5680  reference frame in pixels. \\
5681 \bitvar{REFP}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5682                               8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
5683  array containing the contents of the current plane of the reference frame. \\
5684 \bitvar{BX}    & Integer   & 20 & No  & The horizontal pixel index of the
5685  lower-left corner of the current block. \\
5686 \bitvar{BY}    & Integer   & 20 & No  & The vertical pixel index of the
5687  lower-left corner of the current block. \\
5688 \bitvar{MVX}   & Integer   &  5 & No  & The horizontal component of the block
5689  motion vector.
5690 This is always a whole-pixel value. \\
5691 \bitvar{MVY}   & Integer   &  5 & No  & The vertical component of the block
5692  motion vector.
5693 This is always a whole-pixel value. \\
5694 \bottomrule\end{tabularx}
5695
5696 \paragraph{Output parameters:}\hfill\\*
5697 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5698 \multicolumn{1}{c}{Name} &
5699 \multicolumn{1}{c}{Type} &
5700 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5701 \multicolumn{1}{c}{Signed?} &
5702 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5703 \bitvar{PRED}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5704                                8 & No  & An $8\times 8$ array of predictor
5705  values to use for INTER coded blocks. \\
5706 \bottomrule\end{tabularx}
5707
5708 \paragraph{Variables used:}\hfill\\*
5709 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5710 \multicolumn{1}{c}{Name} &
5711 \multicolumn{1}{c}{Type} &
5712 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5713 \multicolumn{1}{c}{Signed?} &
5714 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5715 \locvar{\idx{bx}} & Integer &  3 & Yes & The horizontal pixel index in the
5716  block. \\
5717 \locvar{\idx{by}} & Integer &  3 & Yes & The vertical pixel index in the
5718  block. \\
5719 \locvar{\idx{rx}} & Integer & 20 & No  & The horizontal pixel index in the
5720  reference frame. \\
5721 \locvar{\idx{ry}} & Integer & 20 & No  & The vertical pixel index in the
5722  reference frame. \\
5723 \bottomrule\end{tabularx}
5724 \medskip
5725
5726 The whole pixel predictor simply copies verbatim the contents of the reference
5727  frame pointed to by the block's motion vector.
5728 If the vector points outside the reference frame, then the closest value on the
5729  edge of the reference frame is used instead.
5730 In practice, this is usually implemented by expanding the size of the reference
5731  frame by $8$ or $16$ pixels on each side---depending on whether or not the
5732  corresponding axis is subsampled in the current plane---and copying the border
5733  pixels into this region.
5734
5735 \begin{enumerate}
5736 \item
5737 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5738 \begin{enumerate}
5739 \item
5740 Assign \locvar{\idx{ry}} the value
5741  $(\bitvar{BY}+\bitvar{MVY}+\locvar{\idx{by}})$.
5742 \item
5743 If \locvar{\idx{ry}} is greater than $(\bitvar{RPH}-1)$, assign
5744  \locvar{\idx{ry}} the value $(\bitvar{RPH}-1)$.
5745 \item
5746 If \locvar{\idx{ry}} is less than zero, assign \locvar{\idx{ry}} the value
5747  zero.
5748 \item
5749 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5750 \begin{enumerate}
5751 \item
5752 Assign \locvar{\idx{rx}} the value
5753  $(\bitvar{BX}+\bitvar{MVX}+\locvar{\idx{bx}})$.
5754 \item
5755 If \locvar{\idx{rx}} is greater than $(\bitvar{RPW}-1)$, assign
5756  \locvar{\idx{rx}} the value $(\bitvar{RPW}-1)$.
5757 \item
5758 If \locvar{\idx{rx}} is less than zero, assign \locvar{\idx{rx}} the value
5759  zero.
5760 \item
5761 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value
5762  $\bitvar{REFP}[\locvar{\idx{ry}}][\locvar{\idx{rx}}]$.
5763 \end{enumerate}
5764 \end{enumerate}
5765 \end{enumerate}
5766
5767 \subsubsection{The Half-Pixel Predictor}
5768 \label{sub:predhalfpel}
5769
5770 \paragraph{Input parameters:}\hfill\\*
5771 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5772 \multicolumn{1}{c}{Name} &
5773 \multicolumn{1}{c}{Type} &
5774 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5775 \multicolumn{1}{c}{Signed?} &
5776 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5777 \bitvar{RPW}   & Integer   & 20 & No  & The width of the current plane of the
5778  reference frame in pixels. \\
5779 \bitvar{RPH}   & Integer   & 20 & No  & The height of the current plane of the
5780  reference frame in pixels. \\
5781 \bitvar{REFP}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5782                               8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
5783  array containing the contents of the current plane of the reference frame. \\
5784 \bitvar{BX}    & Integer   & 20 & No  & The horizontal pixel index of the
5785  lower-left corner of the current block. \\
5786 \bitvar{BY}    & Integer   & 20 & No  & The vertical pixel index of the
5787  lower-left corner of the current block. \\
5788 \bitvar{MVX}   & Integer   &  5 & No  & The horizontal component of the first
5789  whole-pixel motion vector. \\
5790 \bitvar{MVY}   & Integer   &  5 & No  & The vertical component of the first
5791  whole-pixel motion vector. \\
5792 \bitvar{MVX2}  & Integer   &  5 & No  & The horizontal component of the second
5793  whole-pixel motion vector. \\
5794 \bitvar{MVY2}  & Integer   &  5 & No  & The vertical component of the second
5795  whole-pixel motion vector. \\
5796 \bottomrule\end{tabularx}
5797
5798 \paragraph{Output parameters:}\hfill\\*
5799 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5800 \multicolumn{1}{c}{Name} &
5801 \multicolumn{1}{c}{Type} &
5802 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5803 \multicolumn{1}{c}{Signed?} &
5804 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5805 \bitvar{PRED}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5806                                8 & No  & An $8\times 8$ array of predictor
5807  values to use for INTER coded blocks. \\
5808 \bottomrule\end{tabularx}
5809
5810 \paragraph{Variables used:}\hfill\\*
5811 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5812 \multicolumn{1}{c}{Name} &
5813 \multicolumn{1}{c}{Type} &
5814 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5815 \multicolumn{1}{c}{Signed?} &
5816 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5817 \locvar{\idx{bx}} & Integer &  3 & Yes & The horizontal pixel index in the
5818  block. \\
5819 \locvar{\idx{by}} & Integer &  3 & Yes & The vertical pixel index in the
5820  block. \\
5821 \locvar{\idx{rx1}} & Integer & 20 & No  & The first horizontal pixel index in
5822  the reference frame. \\
5823 \locvar{\idx{ry1}} & Integer & 20 & No  & The first vertical pixel index in the
5824  reference frame. \\
5825 \locvar{\idx{rx2}} & Integer & 20 & No  & The second horizontal pixel index in
5826  the reference frame. \\
5827 \locvar{\idx{ry2}} & Integer & 20 & No  & The second vertical pixel index in
5828  the reference frame. \\
5829 \bottomrule\end{tabularx}
5830 \medskip
5831
5832 If one or both of the components of the block motion vector is not a
5833  whole-pixel value, then the half-pixel predictor is used.
5834 The half-pixel predictor converts the fractional motion vector into two
5835  whole-pixel motion vectors.
5836 The first is formed by truncating the values of each component towards zero,
5837  and the second is formed by truncating them away from zero.
5838 The contributions from the reference frame at the locations pointed to by each
5839  vector are averaged, truncating towards negative infinity.
5840
5841 Only two samples from the reference frame contribute to each predictor value,
5842  even if both components of the motion vector have non-zero fractional
5843  components.
5844 Motion vector components with quarter-pixel accuracy in the chroma planes are
5845  treated exactly the same as those with half-pixel accuracy.
5846 Any non-zero fractional part gets rounded one way in the first vector, and the
5847  other way in the second.
5848
5849 \begin{enumerate}
5850 \item
5851 For each value of \locvar{\idx{by}} from $0$ to $7$, inclusive:
5852 \begin{enumerate}
5853 \item
5854 Assign \locvar{\idx{ry1}} the value
5855  $(\bitvar{BY}+\bitvar{MVY1}+\locvar{\idx{by}})$.
5856 \item
5857 If \locvar{\idx{ry1}} is greater than $(\bitvar{RPH}-1)$, assign
5858  \locvar{\idx{ry1}} the value $(\bitvar{RPH}-1)$.
5859 \item
5860 If \locvar{\idx{ry1}} is less than zero, assign \locvar{\idx{ry1}} the value
5861  zero.
5862 \item
5863 Assign \locvar{\idx{ry2}} the value
5864  $(\bitvar{BY}+\bitvar{MVY2}+\locvar{\idx{by}})$.
5865 \item
5866 If \locvar{\idx{ry2}} is greater than $(\bitvar{RPH}-1)$, assign
5867  \locvar{\idx{ry2}} the value $(\bitvar{RPH}-1)$.
5868 \item
5869 If \locvar{\idx{ry2}} is less than zero, assign \locvar{\idx{ry2}} the value
5870  zero.
5871 \item
5872 For each value of \locvar{\idx{bx}} from $0$ to $7$, inclusive:
5873 \begin{enumerate}
5874 \item
5875 Assign \locvar{\idx{rx1}} the value
5876  $(\bitvar{BX}+\bitvar{MVX1}+\locvar{\idx{bx}})$.
5877 \item
5878 If \locvar{\idx{rx1}} is greater than $(\bitvar{RPW}-1)$, assign
5879  \locvar{\idx{rx1}} the value $(\bitvar{RPW}-1)$.
5880 \item
5881 If \locvar{\idx{rx1}} is less than zero, assign \locvar{\idx{rx1}} the value
5882  zero.
5883 \item
5884 Assign \locvar{\idx{rx2}} the value
5885  $(\bitvar{BX}+\bitvar{MVX2}+\locvar{\idx{bx}})$.
5886 \item
5887 If \locvar{\idx{rx2}} is greater than $(\bitvar{RPW}-1)$, assign
5888  \locvar{\idx{rx2}} the value $(\bitvar{RPW}-1)$.
5889 \item
5890 If \locvar{\idx{rx2}} is less than zero, assign \locvar{\idx{rx2}} the value
5891  zero.
5892 \item
5893 Assign $\bitvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value
5894 \begin{equation*}
5895  (\bitvar{REFP}[\locvar{\idx{ry1}}][\locvar{\idx{rx1}}]+
5896  \bitvar{REFP}[\locvar{\idx{ry2}}][\locvar{\idx{rx2}}])>>1.
5897 \end{equation*}
5898 \end{enumerate}
5899 \end{enumerate}
5900 \end{enumerate}
5901
5902 \subsection{Dequantization}
5903 \label{sub:dequant}
5904
5905 \paragraph{Input parameters:}\hfill\\*
5906 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5907 \multicolumn{1}{c}{Name} &
5908 \multicolumn{1}{c}{Type} &
5909 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5910 \multicolumn{1}{c}{Signed?} &
5911 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5912 \bitvar{COEFFS}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
5913                              16 & Yes & An $\bitvar{NBS}\times 64$ array of
5914  quantized DCT coefficient values for each block in zig-zag order. \\
5915 \bitvar{ACSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
5916                              16 & No  & A 64-element array of scale values for
5917  AC coefficients for each \qi\ value. \\
5918 \bitvar{DCSCALE} & \multicolumn{1}{p{40pt}}{Integer array} &
5919                              16 & No  & A 64-element array of scale values for
5920  the DC coefficient for each \qi\ value. \\
5921 \bitvar{BMS}     & \multicolumn{1}{p{50pt}}{2D Integer array} &
5922                               8 & No  & A $\bitvar{NBMS}\times 64$ array
5923  containing the base matrices. \\
5924 \bitvar{NQRS}    & \multicolumn{1}{p{50pt}}{2D Integer array} &
5925                               6 & No  & A $2\times 3$ array containing the
5926  number of quant ranges for a given \qti\ and \pli, respectively.
5927 This is at most $63$. \\
5928 \bitvar{QRSIZES} & \multicolumn{1}{p{50pt}}{3D Integer array} &
5929                               6 & No  & A $2\times 3\times 63$ array of the
5930  sizes of each quant range for a given \qti\ and \pli, respectively.
5931 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
5932 \bitvar{QRBMIS}  & \multicolumn{1}{p{50pt}}{3D Integer array} &
5933                               9 & No  & A $2\times 3\times 64$ array of the
5934  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
5935 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
5936 \bitvar{\qti}    & Integer &  1 & No  & A quantization type index.
5937 See Table~\ref{tab:quant-types}.\\
5938 \bitvar{\pli}    & Integer &  2 & No  & A color plane index.
5939 See Table~\ref{tab:color-planes}.\\
5940 \bitvar{\idx{qi0}} & Integer &  6 & No  & The quantization index of the DC
5941  coefficient. \\
5942 \bitvar{\qi}     & Integer &  6 & No  & The quantization index of the AC
5943  coefficients. \\
5944 \bitvar{\bi}     & Integer & 36 & No  & The index of the current block in
5945  coded order. \\
5946 \bottomrule\end{tabularx}
5947
5948 \paragraph{Output parameters:}\hfill\\*
5949 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5950 \multicolumn{1}{c}{Name} &
5951 \multicolumn{1}{c}{Type} &
5952 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5953 \multicolumn{1}{c}{Signed?} &
5954 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5955 \bitvar{DQC}   & \multicolumn{1}{p{40pt}}{Integer Array} &
5956                               14 & Yes & A $64$-element array of dequantized
5957  DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\
5958 \bottomrule\end{tabularx}
5959
5960 \paragraph{Variables used:}\hfill\\*
5961 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
5962 \multicolumn{1}{c}{Name} &
5963 \multicolumn{1}{c}{Type} &
5964 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
5965 \multicolumn{1}{c}{Signed?} &
5966 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
5967 \locvar{QMAT}    & \multicolumn{1}{p{40pt}}{Integer array} &
5968                              16 & No  & A 64-element array of quantization
5969  values for each DCT coefficient in natural order. \\
5970 \locvar{\ci}     & Integer &  6 & No  & The DCT coefficient index in natural
5971  order. \\
5972 \locvar{\zzi}    & Integer &  6 & No  & The DCT coefficient index in zig-zag
5973  order. \\
5974 \locvar{C}       & Integer & 29 & Yes & A single dequantized coefficient. \\
5975 \bottomrule\end{tabularx}
5976 \medskip
5977
5978 This procedure takes the quantized DCT coefficient values in zig-zag order for
5979  a single block---after DC prediction has been undone---and returns the
5980  dequantized values in natural order.
5981 If large coefficient values are decoded for coarsely quantized coefficients,
5982  the resulting dequantized value can be significantly larger than 16 bits.
5983 Such a coefficient is truncated to a signed 16-bit representation by discarding
5984  the higher-order bits of its twos-complement representation.
5985
5986 Although this procedure recomputes the quantization matrices from the
5987  parameters in the setup header for each block, there are at most six different
5988  ones used for each color plane.
5989 An efficient implementation could compute them once in advance.
5990
5991 \begin{enumerate}
5992 \item
5993 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
5994  \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and
5995  \bitvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to
5996  compute the DC quantization matrix \locvar{QMAT}.
5997 \item
5998 Assign \locvar{C} the value
5999  $\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]$.
6000 \item
6001 Truncate \locvar{C} to a 16-bit representation by dropping any higher-order
6002  bits.
6003 \item
6004 Assign $\bitvar{DQC}[0]$ the value \locvar{C}.
6005 \item
6006 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
6007  \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{\qti}, \bitvar{\pli}, and
6008  \bitvar{\qi}, use the procedure given in Section~\ref{sub:quant-mat} to
6009  compute the AC quantization matrix \locvar{QMAT}.
6010 \item
6011 For each value of \locvar{\ci} from 1 to 63, inclusive:
6012 \begin{enumerate}
6013 \item
6014 Assign \locvar{\zzi} the index in zig-zag order corresponding to \locvar{\ci}.
6015 E.g., the value at row $(\locvar{\ci}//8)$ and column $(\locvar{\ci}\%8)$ in
6016  Figure~\ref{tab:zig-zag}
6017 \item
6018 Assign \locvar{C} the value
6019  $\bitvar{COEFFS}[\bitvar{\bi}][\locvar{\zzi}]*\locvar{QMAT}[\locvar{\ci}]$.
6020 \item
6021 Truncate \locvar{C} to a 16-bit representation by dropping any higher-order
6022  bits.
6023 \item
6024 Assign $\bitvar{DQC}[\locvar{\ci}]$ the value \locvar{C}.
6025 \end{enumerate}
6026 \end{enumerate}
6027
6028 \subsection{The Inverse DCT}
6029
6030 The 2D inverse DCT is separated into two applications of the 1D inverse DCT.
6031 The transform is first applied to each row, and then applied to each column of
6032  the result.
6033
6034 Each application of the 1D inverse DCT scales the values by a factor of two
6035  relative to the orthonormal version of the transform, for a total scale factor
6036  of four for the 2D transform.
6037 It is assumed that a similar scale factor is applied during the forward DCT
6038  used in the encoder, so that a division by 16 is required after the transform
6039  has been applied in both directions.
6040 The inclusion of this scale factor allows the integerized transform to operate
6041  with increased precision.
6042 All divisions throughout the transform are implemented with right shifts.
6043 Only the final division by $16$ is rounded, with ties rounded towards positive
6044  infinity.
6045
6046 All intermediate values are truncated to a 32-bit signed representation by
6047  discarding any higher-order bits in their two's complement representation.
6048 The final output of each 1D transform is truncated to 16-bits in the same
6049  manner.
6050 In practice, 32 bits is sufficient for every calculation except scaling by
6051  $C4$.
6052 Here we specify truncating to 16 bits after the right shift by 16, but this is
6053  equivalent to truncating the result of the multiply to 32 bits before the
6054  right shift.
6055
6056 The 1D transform can only overflow if input coefficients larger than $\pm 6201$
6057  are present.
6058 However, the result of applying the 2D forward transform on pixel values in the
6059  range $-255\ldots 255$ can be as large as $\pm 8157$ due to the scale factor
6060  of four that is applied, and quantization errors could make this even larger.
6061 Therefore, the coefficients cannot simply be clamped into a valid range, as
6062  they could still overflow just the 1D inverse transform by itself.
6063
6064 \subsubsection{The 1D Inverse DCT}
6065 \label{sub:1d-idct}
6066
6067 \paragraph{Input parameters:}\hfill\\*
6068 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6069 \multicolumn{1}{c}{Name} &
6070 \multicolumn{1}{c}{Type} &
6071 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6072 \multicolumn{1}{c}{Signed?} &
6073 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6074 \bitvar{Y}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6075                               16 & Yes & An 8-element array of DCT
6076  coefficients. \\
6077 \bottomrule\end{tabularx}
6078
6079 \paragraph{Output parameters:}\hfill\\*
6080 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6081 \multicolumn{1}{c}{Name} &
6082 \multicolumn{1}{c}{Type} &
6083 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6084 \multicolumn{1}{c}{Signed?} &
6085 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6086 \bitvar{X}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6087                               16 & Yes & An 8-element array of output values. \\
6088 \bottomrule\end{tabularx}
6089
6090 \paragraph{Variables used:}\hfill\\*
6091 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6092 \multicolumn{1}{c}{Name} &
6093 \multicolumn{1}{c}{Type} &
6094 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6095 \multicolumn{1}{c}{Signed?} &
6096 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6097 \locvar{T}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6098                               32 & Yes & An 8-element array containing the
6099  current value of each signal line. \\
6100 \locvar{R}        & Integer & 32 & Yes & A temporary value. \\
6101 \bottomrule\end{tabularx}
6102 \medskip
6103
6104 A compliant decoder MUST use the exact implementation of the inverse DCT
6105  defined in this specification.
6106 Some operations may be re-ordered, but the result must be precisely equivalent.
6107 This is a design decision that limits some avenues of decoder optimization, but
6108  prevents any drift in the prediction loop.
6109 Theora uses a 16-bit integerized approximation of of the 8-point 1D inverse DCT
6110  based on the Chen factorization \cite{CSF77}.
6111 It requires 16 multiplications and 26 additions and subtractions.
6112
6113 \begin{figure}[htbp]
6114 \begin{center}
6115 \includegraphics[width=\textwidth]{idct}
6116 \end{center}
6117 \caption{Signal Flow Graph for the 1D Inverse DCT}
6118 \label{fig:idct}
6119 \end{figure}
6120
6121 A signal flow graph of the transformation is presented in
6122  Figure~\ref{fig:idct}.
6123 This graph provides a good visualization of which parts of the transform are
6124  parallelizable.
6125 Time increases from left to right.
6126
6127 Each signal line is involved in an operation where the line is marked with a
6128  dot $\cdot$ or a circled plus sign $\oplus$.
6129 The constants $\locvar{C}i$ and $\locvar{S}j$ are the 16-bit integer
6130  approximations of $\cos(\frac{i\pi}{16})$ and $\sin(\frac{j\pi}{16})$ listed
6131  in Table~\ref{tab:dct-consts}.
6132 When they appear next to a signal line, the value on that line is scaled by the
6133  given constant.
6134 A circled minus sign $\ominus$ next to a signal line indicates that the value
6135  on that line is negated.
6136
6137 Operations on a single signal path through the graph cannot be reordered, but
6138  operations on different paths may be, or may be executed in parallel.
6139 The column of numbers on the left represents an initial permutation of the
6140  input DCT coefficients.
6141 The column on the right represents the unpermuted output.
6142 One can be obtained by bit-reversing the 3-bit binary representation of the
6143  other.
6144
6145 \begin{table}[htbp]
6146 \begin{center}
6147 \begin{tabular}{llr}\toprule
6148 $\locvar{C}i$  & $\locvar{S}j$ & Value   \\\midrule
6149 $\locvar{C1}$  & $\locvar{S7}$ & $64277$ \\
6150 $\locvar{C2}$  & $\locvar{S6}$ & $60547$ \\
6151 $\locvar{C3}$  & $\locvar{S5}$ & $54491$ \\
6152 $\locvar{C4}$  & $\locvar{S4}$ & $46341$ \\
6153 $\locvar{C5}$  & $\locvar{S3}$ & $36410$ \\
6154 $\locvar{C6}$  & $\locvar{S2}$ & $25080$ \\
6155 $\locvar{C7}$  & $\locvar{S1}$ & $12785$ \\
6156 \bottomrule\end{tabular}
6157 \end{center}
6158 \caption{16-bit Approximations of Sines and Cosines}
6159 \label{tab:dct-consts}
6160 \end{table}
6161
6162 \begin{enumerate}
6163 \item
6164 Assign $\locvar{T}[0]$ the value
6165  $\locvar{C4}*(\bitvar{Y}[0]+\bitvar{Y}[4])>>16$.
6166 \item
6167 Truncate $\locvar{T}[0]$ to a 16-bit representation by dropping any
6168  higher-order bits.
6169 \item
6170 Assign $\locvar{T}[1]$ the value
6171  $\locvar{C4}*(\bitvar{Y}[0]-\bitvar{Y}[4])>>16$.
6172 \item
6173 Truncate $\locvar{T}[1]$ to a 16-bit representation by dropping any
6174  higher-order bits.
6175 \item
6176 Assign $\locvar{T}[2]$ the value $(\locvar{C6}*\bitvar{Y}[2]>>16)-
6177  (\locvar{S6}*\bitvar{Y}[6]>>16)$.
6178 \item
6179 Assign $\locvar{T}[3]$ the value $(\locvar{S6}*\bitvar{Y}[2]>>16)+
6180  (\locvar{C6}*\bitvar{Y}[6]>>16)$.
6181 \item
6182 Assign $\locvar{T}[4]$ the value $(\locvar{C7}*\bitvar{Y}[1]>>16)-
6183  (\locvar{S7}*\bitvar{Y}[7]>>16)$.
6184 \item
6185 Assign $\locvar{T}[5]$ the value $(\locvar{C3}*\bitvar{Y}[5]>>16)-
6186  (\locvar{S3}*\bitvar{Y}[3]>>16)$.
6187 \item
6188 Assign $\locvar{T}[6]$ the value $(\locvar{S3}*\bitvar{Y}[5]>>16)+
6189  (\locvar{C3}*\bitvar{Y}[3]>>16)$.
6190 \item
6191 Assign $\locvar{T}[7]$ the value $(\locvar{S7}*\bitvar{Y}[1]>>16)+
6192  (\locvar{C7}*\bitvar{Y}[7]>>16)$.
6193 \item
6194 Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$.
6195 \item
6196 Assign $\locvar{T}[5]$ the value
6197  $\locvar{C4}*(\locvar{T}[4]-\locvar{T}[5])>>16$.
6198 \item
6199 Truncate $\locvar{T}[5]$ to a 16-bit representation by dropping any
6200  higher-order bits.
6201 \item
6202 Assign $\locvar{T}[4]$ the value $\locvar{R}$.
6203 \item
6204 Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$.
6205 \item
6206 Assign $\locvar{T}[6]$ the value
6207  $\locvar{C4}*(\locvar{T}[7]-\locvar{T}[6])>>16$.
6208 \item
6209 Truncate $\locvar{T}[6]$ to a 16-bit representation by dropping any
6210  higher-order bits.
6211 \item
6212 Assign $\locvar{T}[7]$ the value $\locvar{R}$.
6213 \item
6214 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$.
6215 \item
6216 Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$.
6217 \item
6218 Assign $\locvar{T}[0]$ the value \locvar{R}.
6219 \item
6220 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$
6221 \item
6222 Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$
6223 \item
6224 Assign $\locvar{T}[1]$ the value \locvar{R}.
6225 \item
6226 Assign \locvar{R} the value $\locvar{T}[6]+\locvar{T}[5]$.
6227 \item
6228 Assign $\locvar{T}[5]$ the value $\locvar{T}[6]-\locvar{T}[5]$.
6229 \item
6230 Assign $\locvar{T}[6]$ the value \locvar{R}.
6231 \item
6232 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[7]$.
6233 \item
6234 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6235  bits.
6236 \item
6237 Assign $\bitvar{X}[0]$ the value \locvar{R}.
6238 \item
6239 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[6]$.
6240 \item
6241 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6242  bits.
6243 \item
6244 Assign $\bitvar{X}[1]$ the value \locvar{R}.
6245 \item
6246 Assign \locvar{R} the value $\locvar{T}[2]+\locvar{T}[5]$.
6247 \item
6248 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6249  bits.
6250 \item
6251 Assign $\bitvar{X}[2]$ the value \locvar{R}.
6252 \item
6253 Assign \locvar{R} the value $\locvar{T}[3]+\locvar{T}[4]$.
6254 \item
6255 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6256  bits.
6257 \item
6258 Assign $\bitvar{X}[3]$ the value \locvar{R}.
6259 \item
6260 Assign \locvar{R} the value $\locvar{T}[3]-\locvar{T}[4]$.
6261 \item
6262 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6263  bits.
6264 \item
6265 Assign $\bitvar{X}[4]$ the value \locvar{R}.
6266 \item
6267 Assign \locvar{R} the value $\locvar{T}[2]-\locvar{T}[5]$.
6268 \item
6269 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6270  bits.
6271 \item
6272 Assign $\bitvar{X}[5]$ the value \locvar{R}.
6273 \item
6274 Assign \locvar{X} the value $\locvar{T}[1]-\locvar{T}[6]$.
6275 \item
6276 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6277  bits.
6278 \item
6279 Assign $\bitvar{X}[6]$ the value \locvar{R}.
6280 \item
6281 Assign \locvar{R} the value $\locvar{T}[0]-\locvar{T}[7]$.
6282 \item
6283 Truncate \locvar{R} to a 16-bit representation by dropping any higher-order
6284  bits.
6285 \item
6286 Assign $\bitvar{X}[7]$ the value \locvar{R}.
6287 \end{enumerate}
6288
6289 \subsubsection{The 2D Inverse DCT}
6290 \label{sub:2d-idct}
6291
6292 \paragraph{Input parameters:}\hfill\\*
6293 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6294 \multicolumn{1}{c}{Name} &
6295 \multicolumn{1}{c}{Type} &
6296 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6297 \multicolumn{1}{c}{Signed?} &
6298 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6299 \bitvar{DQC}      & \multicolumn{1}{p{40pt}}{Integer Array} &
6300                               14 & Yes & A $64$-element array of dequantized
6301  DCT coefficients in natural order (cf. Section~\ref{sec:dct-coeffs}). \\
6302 \bottomrule\end{tabularx}
6303
6304 \paragraph{Output parameters:}\hfill\\*
6305 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6306 \multicolumn{1}{c}{Name} &
6307 \multicolumn{1}{c}{Type} &
6308 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6309 \multicolumn{1}{c}{Signed?} &
6310 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6311 \bitvar{RES}   & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6312                               16 & Yes & An $8\times 8$ array containing the
6313  decoded residual for the current block. \\
6314 \bottomrule\end{tabularx}
6315
6316 \paragraph{Variables used:}\hfill\\*
6317 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6318 \multicolumn{1}{c}{Name} &
6319 \multicolumn{1}{c}{Type} &
6320 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6321 \multicolumn{1}{c}{Signed?} &
6322 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6323 \locvar{\ci}     & Integer &  3 & No  & The column index. \\
6324 \locvar{\ri}     & Integer &  3 & No  & The row index. \\
6325 \locvar{Y}       & \multicolumn{1}{p{40pt}}{Integer Array} &
6326                              16 & Yes & An 8-element array of 1-D iDCT input
6327  values. \\
6328 \locvar{X}       & \multicolumn{1}{p{40pt}}{Integer Array} &
6329                              16 & Yes & An 8-element array of 1-D iDCT output
6330  values. \\
6331 \bottomrule\end{tabularx}
6332 \medskip
6333
6334 This procedure applies the 1-D inverse DCT transform 16 times to a block of
6335  dequantized coefficients: once for each of the 8 rows, and once for each of
6336  the 8 columns of the result.
6337 Note that the coordinate system used for the columns is the same right-handed
6338  coordinate system used by the rest of Theora.
6339 Thus, the column is indexed from bottom to top, not top to bottom.
6340 The final values are divided by sixteen, rounding with ties rounded towards
6341  postive infinity.
6342
6343 \begin{enumerate}
6344 \item
6345 For each value of \locvar{\ri} from 0 to 7:
6346 \begin{enumerate}
6347 \item
6348 For each value of \locvar{\ci} from 0 to 7:
6349 \begin{enumerate}
6350 \item
6351 Assign $\locvar{Y}[\locvar{\ci}]$ the value
6352  $\bitvar{DQC}[\locvar{\ri}*8+\locvar{\ci}]$.
6353 \end{enumerate}
6354 \item
6355 Compute \locvar{X}, the 1-D inverse DCT of \locvar{Y} using the procedure
6356  described in Section~\ref{sub:1d-idct}.
6357 \item
6358 For each value of $\locvar{\ci}$ from 0 to 7:
6359 \begin{enumerate}
6360 \item
6361 Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value
6362  $\locvar{X}[\locvar{\ci}]$.
6363 \end{enumerate}
6364 \end{enumerate}
6365 \item
6366 For each value of \locvar{\ci} from 0 to 7:
6367 \begin{enumerate}
6368 \item
6369 For each value of \locvar{\ri} from 0 to 7:
6370 \begin{enumerate}
6371 \item
6372 Assign $\locvar{Y}[\locvar{\ri}]$ the value
6373  $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$.
6374 \end{enumerate}
6375 \item
6376 Compute \locvar{X}, the 1-D inverse DCT of \locvar{Y} using the procedure
6377  described in Section~\ref{sub:1d-idct}.
6378 \item
6379 For each value of \locvar{\ri} from 0 to 7:
6380 \begin{enumerate}
6381 \item
6382 Assign $\bitvar{RES}[\locvar{\ri}][\locvar{\ci}]$ the value
6383  $(\locvar{X}[\locvar{\ri}]+8)>>4$.
6384 \end{enumerate}
6385 \end{enumerate}
6386 \end{enumerate}
6387
6388 \subsubsection{The 1D Forward DCT (Non-Normative)}
6389
6390 \paragraph{Input parameters:}\hfill\\*
6391 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6392 \multicolumn{1}{c}{Name} &
6393 \multicolumn{1}{c}{Type} &
6394 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6395 \multicolumn{1}{c}{Signed?} &
6396 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6397 \bitvar{X}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6398                               14 & Yes & An 8-element array of input values. \\
6399 \bottomrule\end{tabularx}
6400
6401 \paragraph{Output parameters:}\hfill\\*
6402 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6403 \multicolumn{1}{c}{Name} &
6404 \multicolumn{1}{c}{Type} &
6405 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6406 \multicolumn{1}{c}{Signed?} &
6407 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6408 \bitvar{Y}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6409                               16 & Yes & An 8-element array of DCT
6410  coefficients. \\
6411 \bottomrule\end{tabularx}
6412
6413 \paragraph{Variables used:}\hfill\\*
6414 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6415 \multicolumn{1}{c}{Name} &
6416 \multicolumn{1}{c}{Type} &
6417 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6418 \multicolumn{1}{c}{Signed?} &
6419 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6420 \locvar{T}        & \multicolumn{1}{p{40pt}}{Integer Array} &
6421                               16 & Yes & An 8-element array containing the
6422  current value of each signal line. \\
6423 \locvar{R}        & Integer & 16 & Yes & A temporary value. \\
6424 \bottomrule\end{tabularx}
6425 \medskip
6426
6427 The forward transform used in the encoder is not mandated by this standard as
6428  the inverse one is.
6429 Precise equivalence in the inverse transform alone is all that is required to
6430  guarantee that there is no mismatch in the prediction loop between encoder and
6431  any compliant decoder implementation.
6432 However, a forward transform is provided here as a convenience for implementing
6433  an encoder.
6434 This is the version of the transform used by Xiph.org's Theora encoder, which
6435  is the same as that used by VP3.
6436 Like the inverse DCT, it is first applied to each row, and then applied to each
6437  column of the result.
6438
6439 \begin{figure}[htbp]
6440 \begin{center}
6441 \includegraphics[width=\textwidth]{fdct}
6442 \end{center}
6443 \caption{Signal Flow Graph for the 1D Forward DCT}
6444 \label{fig:fdct}
6445 \end{figure}
6446
6447 The signal flow graph for the forward transform is given in
6448  Figure~\ref{fig:fdct}.
6449 It is largely the reverse of the flow graph given for the inverse DCT.
6450 It is important to note that the signs on the constants in the rotations have
6451  changed, and the \locvar{C4} scale factors on one of the lower butterflies now
6452  appear on the opposite side.
6453 The column of numbers on the left represents the unpermuted input, and the
6454  column on the right the permuted output DCT coefficients.
6455
6456 A proper division by $2^{16}$ is done after the multiplications instead of a
6457  shift in the forward transform.
6458 This can be implemented quickly by adding an offset of $\hex{FFFF}$ if the
6459  number is negative, and then shifting as before.
6460 This slightly increases the computational complexity of the transform.
6461 Unlike the inverse DCT, 16 bit registers and a $16\times16\rightarrow32$ bit
6462  multiply are sufficient to avoid any overflow, so long as the input is in the
6463  range $-6270\ldots 6270$, which is larger than required.
6464
6465 \begin{enumerate}
6466 \item
6467 Assign $\locvar{T}[0]$ the value $\bitvar{X}[0]+\bitvar{X}[7]$.
6468 \item
6469 Assign $\locvar{T}[1]$ the value $\bitvar{X}[1]+\bitvar{X}[6]$.
6470 \item
6471 Assign $\locvar{T}[2]$ the value $\bitvar{X}[2]+\bitvar{X}[5]$.
6472 \item
6473 Assign $\locvar{T}[3]$ the value $\bitvar{X}[3]+\bitvar{X}[4]$.
6474 \item
6475 Assign $\locvar{T}[4]$ the value $\bitvar{X}[3]-\bitvar{X}[4]$.
6476 \item
6477 Assign $\locvar{T}[5]$ the value $\bitvar{X}[2]-\bitvar{X}[5]$.
6478 \item
6479 Assign $\locvar{T}[6]$ the value $\bitvar{X}[1]-\bitvar{X}[6]$.
6480 \item
6481 Assign $\locvar{T}[7]$ the value $\bitvar{X}[0]-\bitvar{X}[7]$.
6482 \item
6483 Assign \locvar{R} the value $\locvar{T}[0]+\locvar{T}[3]$.
6484 \item
6485 Assign $\locvar{T}[3]$ the value $\locvar{T}[0]-\locvar{T}[3]$.
6486 \item
6487 Assign $\locvar{T}[0]$ the value \locvar{R}.
6488 \item
6489 Assign \locvar{R} the value $\locvar{T}[1]+\locvar{T}[2]$.
6490 \item
6491 Assign $\locvar{T}[2]$ the value $\locvar{T}[1]-\locvar{T}[2]$.
6492 \item
6493 Assign $\locvar{T}[1]$ the value \locvar{R}.
6494 \item
6495 Assign \locvar{R} the value $\locvar{T}[6]-\locvar{T}[5]$.
6496 \item
6497 Assign $\locvar{T}[6]$ the value
6498  $(\locvar{C4}*(\locvar{T}[6]+\locvar{T}[5]))//16$.
6499 \item
6500 Assign $\locvar{T}[5]$ the value $(\locvar{C4}*\locvar{R})//16$.
6501 \item
6502 Assign \locvar{R} the value $\locvar{T}[4]+\locvar{T}[5]$.
6503 \item
6504 Assign $\locvar{T}[5]$ the value $\locvar{T}[4]-\locvar{T}[5]$.
6505 \item
6506 Assign $\locvar{T}[4]$ the value \locvar{R}.
6507 \item
6508 Assign \locvar{R} the value $\locvar{T}[7]+\locvar{T}[6]$.
6509 \item
6510 Assign $\locvar{T}[6]$ the value $\locvar{T}[7]-\locvar{T}[6]$.
6511 \item
6512 Assign $\locvar{T}[7]$ the value \locvar{R}.
6513 \item
6514 Assign $\bitvar{Y}[0]$ the value
6515  $(\locvar{C4}*(\locvar{T}[0]+\locvar{T}[1]))//16$.
6516 \item
6517 Assign $\bitvar{Y}[4]$ the value
6518  $(\locvar{C4}*(\locvar{T}[0]-\locvar{T}[1]))//16$.
6519 \item
6520 Assign $\bitvar{Y}[2]$ the value
6521  $((\locvar{S6}*\locvar{T}[3])//16)+
6522  ((\locvar{C6}*\locvar{T}[2])//16)$.
6523 \item
6524 Assign $\bitvar{Y}[6]$ the value
6525  $((\locvar{C6}*\locvar{T}[3])//16)-
6526  ((\locvar{S6}*\locvar{T}[2])//16)$.
6527 \item
6528 Assign $\bitvar{Y}[1]$ the value
6529  $((\locvar{S7}*\locvar{T}[7])//16)+
6530  ((\locvar{C7}*\locvar{T}[4])//16)$.
6531 \item
6532 Assign $\bitvar{Y}[5]$ the value
6533  $((\locvar{S3}*\locvar{T}[6])//16)+
6534  ((\locvar{C3}*\locvar{T}[5])//16)$.
6535 \item
6536 Assign $\bitvar{Y}[3]$ the value
6537  $((\locvar{C3}*\locvar{T}[6])//16)-
6538  ((\locvar{S3}*\locvar{T}[5])//16)$.
6539 \item
6540 Assign $\bitvar{Y}[7]$ the value
6541  $((\locvar{C7}*\locvar{T}[7])//16)-
6542  ((\locvar{S7}*\locvar{T}[4])//16)$.
6543 \end{enumerate}
6544
6545 \subsection{The Complete Reconstruction Algorithm}
6546 \label{sub:recon}
6547
6548 \paragraph{Input parameters:}\hfill\\*
6549 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6550 \multicolumn{1}{c}{Name} &
6551 \multicolumn{1}{c}{Type} &
6552 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6553 \multicolumn{1}{c}{Signed?} &
6554 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6555 \bitvar{ACSCALE}   & \multicolumn{1}{p{40pt}}{Integer array} &
6556                                16 & No  & A 64-element array of scale values
6557  for AC coefficients for each \qi\ value. \\
6558 \bitvar{DCSCALE}   & \multicolumn{1}{p{40pt}}{Integer array} &
6559                                16 & No  & A 64-element array of scale values
6560  for the DC coefficient for each \qi\ value. \\
6561 \bitvar{BMS}       & \multicolumn{1}{p{50pt}}{2D Integer array} &
6562                                 8 & No  & A $\bitvar{NBMS}\times 64$ array
6563  containing the base matrices. \\
6564 \bitvar{NQRS}      & \multicolumn{1}{p{50pt}}{2D Integer array} &
6565                                 6 & No  & A $2\times 3$ array containing the
6566  number of quant ranges for a given \qti\ and \pli, respectively.
6567 This is at most $63$. \\
6568 \bitvar{QRSIZES}   & \multicolumn{1}{p{50pt}}{3D Integer array} &
6569                                 6 & No  & A $2\times 3\times 63$ array of the
6570  sizes of each quant range for a given \qti\ and \pli, respectively.
6571 Only the first $\bitvar{NQRS}[\qti][\pli]$ values are used. \\
6572 \bitvar{QRBMIS}    & \multicolumn{1}{p{50pt}}{3D Integer array} &
6573                                 9 & No  & A $2\times 3\times 64$ array of the
6574  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
6575 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values are used. \\
6576 \bitvar{RPYW}      & Integer & 20 & No  & The width of the $Y'$ plane of the
6577  reference frames in pixels. \\
6578 \bitvar{RPYH}      & Integer & 20 & No  & The height of the $Y'$ plane of the
6579  reference frames in pixels. \\
6580 \bitvar{RPCW}      & Integer & 20 & No  & The width of the $C_b$ and $C_r$
6581  planes of the reference frames in pixels. \\
6582 \bitvar{RPCH}      & Integer & 20 & No  & The height of the $C_b$ and $C_r$
6583  planes of the reference frames in pixels. \\
6584 \bitvar{GOLDREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6585                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6586  array containing the contents of the $Y'$ plane of the golden reference
6587  frame. \\
6588 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6589                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6590  array containing the contents of the $C_b$ plane of the golden reference
6591  frame. \\
6592 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6593                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6594  array containing the contents of the $C_r$ plane of the golden reference
6595  frame. \\
6596 \bitvar{PREVREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6597                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6598  array containing the contents of the $Y'$ plane of the previous reference
6599  frame. \\
6600 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6601                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6602  array containing the contents of the $C_b$ plane of the previous reference
6603  frame. \\
6604 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6605                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6606  array containing the contents of the $C_r$ plane of the previous reference
6607  frame. \\
6608 \bitvar{NBS}       & Integer & 36 & No  & The total number of blocks in a
6609  frame. \\
6610 \bitvar{BCODED}    & \multicolumn{1}{p{40pt}}{Integer Array} &
6611                                 1 & No  & An \bitvar{NBS}-element array of
6612  flags indicating which blocks are coded. \\
6613 \bitvar{MBMODES}   & \multicolumn{1}{p{40pt}}{Integer Array} &
6614                                 3 & No  & An \bitvar{NMBS}-element array of
6615  coding modes for each macro block. \\
6616 \bitvar{MVECTS}    & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
6617                                 6 & Yes & An \bitvar{NBS}-element array of
6618  motion vectors for each block. \\
6619 \bitvar{COEFFS}    & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6620                                16 & Yes & An $\bitvar{NBS}\times 64$ array of
6621  quantized DCT coefficient values for each block in zig-zag order. \\
6622 \bitvar{NCOEFFS}   & \multicolumn{1}{p{40pt}}{Integer Array} &
6623                                 7 & No  & An \bitvar{NBS}-element array of the
6624  coefficient count for each block. \\
6625 \bitvar{QIS}       & \multicolumn{1}{p{40pt}}{Integer array} &
6626                                 6 & No  & An \bitvar{NQIS}-element array of
6627  \qi\ values. \\
6628 \bitvar{QIIS}      & \multicolumn{1}{p{40pt}}{Integer Array} &
6629                                 2 & No  & An \bitvar{NBS}-element array of
6630  \locvar{\qii} values for each block. \\
6631 \bottomrule\end{tabularx}
6632
6633 \paragraph{Output parameters:}\hfill\\*
6634 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6635 \multicolumn{1}{c}{Name} &
6636 \multicolumn{1}{c}{Type} &
6637 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6638 \multicolumn{1}{c}{Signed?} &
6639 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6640 \bitvar{RECY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6641                                8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
6642  array containing the contents of the $Y'$ plane of the reconstructed frame. \\
6643 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6644                                8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6645  array containing the contents of the $C_b$ plane of the reconstructed frame. \\
6646 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6647                                8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
6648  array containing the contents of the $C_r$ plane of the reconstructed frame. \\
6649 \bottomrule\end{tabularx}
6650
6651 \paragraph{Variables used:}\hfill\\*
6652 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6653 \multicolumn{1}{c}{Name} &
6654 \multicolumn{1}{c}{Type} &
6655 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6656 \multicolumn{1}{c}{Signed?} &
6657 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6658 \locvar{RPW}       & Integer & 20 & No  & The width of the current plane of the
6659  current reference frame in pixels. \\
6660 \locvar{RPH}       & Integer & 20 & No  & The height of the current plane of
6661  the current reference frame in pixels. \\
6662 \locvar{REFP}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6663                                 8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
6664  array containing the contents of the current plane of the current reference
6665  frame. \\
6666 \locvar{BX}        & Integer & 20 & No  & The horizontal pixel index of the
6667  lower-left corner of the current block. \\
6668 \locvar{BY}        & Integer & 20 & No  & The vertical pixel index of the
6669  lower-left corner of the current block. \\
6670 \locvar{MVX}       & Integer &  5 & No  & The horizontal component of the first
6671  whole-pixel motion vector. \\
6672 \locvar{MVY}       & Integer &  5 & No  & The vertical component of the first
6673  whole-pixel motion vector. \\
6674 \locvar{MVX2}      & Integer &  5 & No  & The horizontal component of the second
6675  whole-pixel motion vector. \\
6676 \locvar{MVY2}      & Integer &  5 & No  & The vertical component of the second
6677  whole-pixel motion vector. \\
6678 \locvar{PRED}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6679                                 8 & No  & An $8\times 8$ array of predictor
6680  values to use for the current block. \\
6681 \locvar{RES}       & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6682                                16 & Yes & An $8\times 8$ array containing the
6683  decoded residual for the current block. \\
6684 \locvar{QMAT}      & \multicolumn{1}{p{40pt}}{Integer array} &
6685                                16 & No  & A 64-element array of quantization
6686  values for each DCT coefficient in natural order. \\
6687 \locvar{DC}        & Integer & 29 & Yes & The dequantized DC coefficient of a
6688  block. \\
6689 \locvar{P}         & Integer & 17 & Yes & A reconstructed pixel value. \\
6690 \locvar{\bi}       & Integer & 36 & No  & The index of the current block in
6691  coded order. \\
6692 \locvar{\mbi}      & Integer & 32 & No  & The index of the macro block
6693  containing block \locvar{\bi}. \\
6694 \locvar{\pli}      & Integer &  2 & No  & The color plane index of the current
6695  block. \\
6696 \locvar{\rfi}      & Integer &  2 & No  & The index of the reference frame
6697  indicated by the coding mode for macro block \locvar{\mbi}. \\
6698 \locvar{\idx{bx}}  & Integer &  3 & No  & The horizontal pixel index in the
6699  block. \\
6700 \locvar{\idx{by}}  & Integer &  3 & No  & The vertical pixel index in the
6701  block. \\
6702 \locvar{\qti}      & Integer &  1 & No  & A quantization type index.
6703 See Table~\ref{tab:quant-types}.\\
6704 \locvar{\idx{qi0}} & Integer &  6 & No  & The quantization index of the DC
6705  coefficient. \\
6706 \locvar{\qi}       & Integer &  6 & No  & The quantization index of the AC
6707  coefficients. \\
6708 \bottomrule\end{tabularx}
6709 \medskip
6710
6711 This section takes the decoded packet data and uses the previously defined
6712  procedures to reconstruct each block of the current frame.
6713 For coded blocks, a predictor is formed using the coding mode and, if
6714  applicable, the motion vector, and then the residual is computed from the
6715  quantized DCT coefficients.
6716 For uncoded blocks, the contents of the co-located block are copied from the
6717  previous frame and the residual is cleared to zero.
6718 Then the predictor and residual are added, and the result clamped to the range
6719  $0\ldots 255$ and stored in the current frame.
6720
6721 In the special case that a block contains only a DC coefficient, the
6722  dequantization and inverse DCT transform is skipped.
6723 Instead the constant pixel value for the entire block is computed in one step.
6724 Note that the truncation of intermediate operations is omitted and the final
6725  rounding is slightly different in this case.
6726 The check for whether or not the block contains only a DC coefficient is based
6727  on the coefficient count returned from the token decode procedure of
6728  Section~\ref{sec:dct-decode}, and not by checking to see if the remaining
6729  coefficient values are zero.
6730 Also note that even when the coefficient count indicates the block contains
6731  zero coefficients, the DC coefficient is still processed, as undoing DC
6732  prediction might have made it non-zero.
6733
6734 After this procedure, the frame is completely reconstructed, but before it can
6735  be used as a reference frame, a loop filter must be run over it to help reduce
6736  blocking artifacts.
6737 This is detailed in Section~\ref{sec:loopfilter}.
6738
6739 \begin{enumerate}
6740 \item
6741 Assign \locvar{\idx{qi0}} the value $\bitvar{QIS}[0]$.
6742 \item
6743 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$:
6744 \begin{enumerate}
6745 \item
6746 Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs
6747  to.
6748 \item
6749 Assign \locvar{BX} the horizontal pixel index of the lower-left corner of block
6750  \locvar{\bi}.
6751 \item
6752 Assign \locvar{BY} the vertical pixel index of the lower-left corner of block
6753  \locvar{\bi}.
6754 \item
6755 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
6756 \begin{enumerate}
6757 \item
6758 Assign \locvar{\mbi} the index of the macro block containing block
6759  \locvar{\bi}.
6760 \item
6761 If $\bitvar{MBMODES}[\locvar{\mbi}]$ is 1 (INTRA), assign \locvar{\qti} the
6762  value $0$.
6763 \item
6764 Otherwise, assign \locvar{\qti} the value $1$.
6765 \item
6766 Assign \locvar{\rfi} the value of the Reference Frame Index column of
6767  Table~\ref{tab:cm-refs} corresponding to $\bitvar{MBMODES}[\locvar{\mbi}]$.
6768 \item
6769 If \locvar{\rfi} is zero, compute \locvar{PRED} using the procedure given in
6770  Section~\ref{sub:predintra}.
6771 \item
6772 Otherwise:
6773 \begin{enumerate}
6774 \item
6775 Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in
6776  Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and
6777  \locvar{\pli}.
6778
6779 \begin{table}[htbp]
6780 \begin{center}
6781 \begin{tabular}{cclll}\toprule
6782 \locvar{\rfi} & \locvar{\pli} &
6783 \locvar{REFP} & \locvar{RPW} & \locvar{RPH} \\\midrule
6784 $1$ & $0$ & \bitvar{PREVREFY}  & \bitvar{RPYW} & \bitvar{RPYH} \\
6785 $1$ & $1$ & \bitvar{PREVREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
6786 $1$ & $2$ & \bitvar{PREVREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
6787 $2$ & $0$ & \bitvar{GOLDREFY}  & \bitvar{RPYW} & \bitvar{RPYH} \\
6788 $2$ & $1$ & \bitvar{GOLDREFCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
6789 $2$ & $2$ & \bitvar{GOLDREFCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
6790 \bottomrule\end{tabular}
6791 \end{center}
6792 \caption{Reference Planes and Sizes for Each \locvar{\rfi} and \locvar{\pli}}
6793 \label{tab:refp}
6794 \end{table}
6795
6796 \item
6797 Assign \locvar{MVX} the value
6798 \begin{equation*}
6799  \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rfloor*
6800  \sign(\bitvar{MVECTS}[\locvar{\bi}]_x).
6801 \end{equation*}
6802 \item
6803 Assign \locvar{MVY} the value
6804 \begin{equation*}
6805  \left\lfloor\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rfloor*
6806  \sign(\bitvar{MVECTS}[\locvar{\bi}]_y).
6807 \end{equation*}
6808 \item
6809 Assign \locvar{MVX2} the value
6810 \begin{equation*}
6811  \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_x\rvert\right\rceil*
6812  \sign(\bitvar{MVECTS}[\locvar{\bi}]_x).
6813 \end{equation*}
6814 \item
6815 Assign \locvar{MVY2} the value
6816 \begin{equation*}
6817  \left\lceil\lvert\bitvar{MVECTS}[\locvar{\bi}]_y\rvert\right\rceil*
6818  \sign(\bitvar{MVECTS}[\locvar{\bi}]_y).
6819 \end{equation*}
6820 \item
6821 If \locvar{MVX} equals \locvar{MVX2} and \locvar{MVY} equals \locvar{MVY2},
6822  use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX},
6823  \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the
6824  procedure given in Section~\ref{sub:predfullpel}.
6825 \item
6826 Otherwise, use the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH},
6827  \locvar{BX}, \locvar{BY}, \locvar{MVX}, \locvar{MVY}, \locvar{MVX2}, and
6828  \locvar{MVY2} to compute \locvar{PRED} using the procedure given in
6829  Section~\ref{sub:predhalfpel}.
6830 \end{enumerate}
6831 \item
6832 If $\bitvar{NCOEFFS}[\locvar{\bi}]$ is less than 2:
6833 \begin{enumerate}
6834 \item
6835 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\
6836  \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli}, and
6837  \locvar{\idx{qi0}}, use the procedure given in Section~\ref{sub:quant-mat} to
6838  compute the DC quantization matrix \locvar{QMAT}.
6839 \item
6840 Assign \locvar{DC} the value
6841 \begin{equation*}
6842  (\bitvar{COEFFS}[\bitvar{\bi}][0]*\locvar{QMAT}[0]+15)>>5.
6843 \end{equation*}
6844 \item
6845 Truncate \locvar{DC} to a 16-bit representation by dropping any higher-order
6846  bits.
6847 \item
6848 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6849  \locvar{\idx{bx}} from 0 to 7, assign
6850  $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value \locvar{DC}.
6851 \end{enumerate}
6852 \item
6853 Otherwise:
6854 \begin{enumerate}
6855 \item
6856 Assign \locvar{\qi} the value $\bitvar{QIS}[\bitvar{QIIS}[\locvar{\bi}]]$.
6857 \item
6858 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS}, \\
6859  \bitvar{QRSIZES}, \bitvar{QRBMIS}, \locvar{\qti}, \locvar{\pli},
6860  \locvar{\idx{qi0}}, and \locvar{\qi}, compute \locvar{DQC} using the procedure
6861  given in Section~\ref{sub:dequant}.
6862 \item
6863 Using \locvar{DQC}, compute \locvar{RES} using the procedure given in
6864  Section~\ref{sub:2d-idct}.
6865 \end{enumerate}
6866 \end{enumerate}
6867 \item
6868 Otherwise:
6869 \begin{enumerate}
6870 \item
6871 Assign \locvar{\rfi} the value 1.
6872 \item
6873 Assign \locvar{REFP}, \locvar{RPW}, and \locvar{RPH} the values given in
6874  Table~\ref{tab:refp} corresponding to current value of \locvar{\rfi} and
6875  \locvar{\pli}.
6876 \item
6877 Assign \locvar{MVX} the value 0.
6878 \item
6879 Assign \locvar{MVY} the value 0.
6880 \item
6881 Using the values \locvar{REFP}, \locvar{RPW}, \locvar{RPH}, \locvar{BX},
6882  \locvar{BY}, \locvar{MVX}, and \locvar{MVY}, compute \locvar{PRED} using the
6883  procedure given in Section~\ref{sub:predfullpel}.
6884 This is simply a copy of the co-located block in the previous reference frame.
6885 \item
6886 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6887  \locvar{\idx{bx}} from 0 to 7, assign
6888  $\locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}]$ the value 0.
6889 \end{enumerate}
6890 \item
6891 For each value of \locvar{\idx{by}} from 0 to 7, and each value of
6892  \locvar{\idx{bx}} from 0 to 7:
6893 \begin{enumerate}
6894 \item
6895 Assign \locvar{P} the value
6896  $(\locvar{PRED}[\locvar{\idx{by}}][\locvar{\idx{bx}}]+
6897  \locvar{RES}[\locvar{\idx{by}}][\locvar{\idx{bx}}])$.
6898 \item
6899 If \locvar{P} is greater than $255$, assign \locvar{P} the value $255$.
6900 \item
6901 If \locvar{P} is less than $0$, assign \locvar{P} the value $0$.
6902 \item
6903 If \locvar{\pli} equals 0, assign
6904  $\bitvar{RECY}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6905  the value \locvar{P}.
6906 \item
6907 Otherwise, if \locvar{\pli} equals 1, assign
6908  $\bitvar{RECB}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6909  the value \locvar{P}.
6910 \item
6911 Otherwise, \locvar{\pli} equals 2, so assign
6912  $\bitvar{RECR}[\locvar{BY}+\locvar{\idx{by}}][\locvar{BX}+\locvar{\idx{bx}}]$
6913  the value \locvar{P}.
6914 \end{enumerate}
6915 \end{enumerate}
6916 \end{enumerate}
6917
6918 \section{Loop Filtering}
6919 \label{sec:loopfilter}
6920
6921 The loop filter is a simple deblocking filter that is based on running a small
6922  edge detecting filter over the coded block edges and adjusting the pixel
6923  values by a tapered response.
6924 The filter response is modulated by the following non-linear function:
6925 \begin{align*}
6926 \lflim(\locvar{R},\bitvar{L})&=\left\{\begin{array}{ll}
6927 0,                        & \locvar{R}\le-2*\bitvar{L} \\
6928 -\locvar{R}-2*\bitvar{L}, & -2*\bitvar{L}<\locvar{R}\le-\bitvar{L} \\
6929 \locvar{R},               & -\bitvar{L}<\locvar{R}<\bitvar{L} \\
6930 -\locvar{R}+2*\bitvar{L}, & \bitvar{L}\le\locvar{R}<2*\bitvar{L} \\
6931 0,                        & 2*\bitvar{L}\le\locvar{R}
6932 \end{array}\right.
6933 \end{align*}
6934 Here \bitvar{L} is a limiting value equal to $\bitvar{LFLIMS}[\idx{qi0}]$.
6935 It defines the peaks of the function.
6936 \bitvar{LFLIMS} is an array of values specified in the setup header and is
6937  indexed by \idx{qi0}, the first quantization index for the frame, the one used
6938  for all the DC coefficients.
6939 Larger values of \bitvar{L} indicate a stronger filter.
6940
6941 \subsection{Horizontal Filter}
6942 \label{sub:filth}
6943
6944 \paragraph{Input parameters:}\hfill\\*
6945 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6946 \multicolumn{1}{c}{Name} &
6947 \multicolumn{1}{c}{Type} &
6948 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6949 \multicolumn{1}{c}{Signed?} &
6950 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6951 \bitvar{RECP}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6952                                    8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
6953  array containing the contents of a plane of the reconstructed frame. \\
6954 \bitvar{FX}        & Integer   & 20 & No  & The horizontal pixel index of the
6955  lower-left corner of the area to be filtered. \\
6956 \bitvar{FY}        & Integer   & 20 & No  & The vertical pixel index of the
6957  lower-left corner of the area to be filtered. \\
6958 \bitvar{L}         & Integer   &  7 & No  & The loop filter limit value. \\
6959 \bottomrule\end{tabularx}
6960
6961 \paragraph{Output parameters:}\hfill\\*
6962 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6963 \multicolumn{1}{c}{Name} &
6964 \multicolumn{1}{c}{Type} &
6965 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6966 \multicolumn{1}{c}{Signed?} &
6967 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6968 \bitvar{RECP}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
6969                                8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
6970  array containing the contents of a plane of the reconstructed frame. \\
6971 \bottomrule\end{tabularx}
6972
6973 \paragraph{Variables used:}\hfill\\*
6974 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
6975 \multicolumn{1}{c}{Name} &
6976 \multicolumn{1}{c}{Type} &
6977 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
6978 \multicolumn{1}{c}{Signed?} &
6979 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
6980 \locvar{R}         & Integer &  9 & Yes & The edge detector response. \\
6981 \locvar{P}         & Integer &  9 & Yes & A filtered pixel value. \\
6982 \locvar{\idx{by}}  & Integer & 20 & No  & The vertical pixel index in the
6983  block. \\
6984 \bottomrule\end{tabularx}
6985 \medskip
6986
6987 This procedure applies a $4$-tap horizontal filter to each row of a vertical
6988  block edge.
6989
6990 \begin{enumerate}
6991 \item
6992 For each value of \locvar{\idx{by}} from $0$ to $7$:
6993 \begin{enumerate}
6994 \item
6995 Assign \locvar{R} the value
6996 \begin{multline*}
6997 (\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}]-
6998  3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+\\
6999  3*\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]-
7000  \bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+3]+4)>>3
7001 \end{multline*}
7002 \item
7003 Assign \locvar{P} the value
7004  $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]+
7005  \lflim(\locvar{R},\bitvar{L}))$.
7006 \item
7007 If \locvar{P} is less than zero, assign
7008  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value zero.
7009 \item
7010 Otherwise, if \locvar{P} is greater than $255$, assign
7011  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value $255$.
7012 \item
7013 Otherwise, assign
7014  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+1]$ the value
7015  \locvar{P}.
7016 \item
7017 Assign \locvar{P} the value
7018  $(\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]-
7019  \lflim(\locvar{R},\bitvar{L}))$.
7020 \item
7021 If \locvar{P} is less than zero, assign
7022  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value zero.
7023 \item
7024 Otherwise, if \locvar{P} is greater than $255$, assign
7025  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value $255$.
7026 \item
7027 Otherwise, assign
7028  $\bitvar{RECP}[\bitvar{FY}+\locvar{\idx{by}}][\bitvar{FX}+2]$ the value
7029  \locvar{P}.
7030 \end{enumerate}
7031 \end{enumerate}
7032
7033 \subsection{Vertical Filter}
7034 \label{sub:filtv}
7035
7036 \paragraph{Input parameters:}\hfill\\*
7037 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7038 \multicolumn{1}{c}{Name} &
7039 \multicolumn{1}{c}{Type} &
7040 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7041 \multicolumn{1}{c}{Signed?} &
7042 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7043 \bitvar{RECP}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7044                                    8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
7045  array containing the contents of a plane of the reconstructed frame. \\
7046 \bitvar{FX}        & Integer   & 20 & No  & The horizontal pixel index of the
7047  lower-left corner of the area to be filtered. \\
7048 \bitvar{FY}        & Integer   & 20 & No  & The vertical pixel index of the
7049  lower-left corner of the area to be filtered. \\
7050 \bitvar{L}         & Integer   &  7 & No  & The loop filter limit value. \\
7051 \bottomrule\end{tabularx}
7052
7053 \paragraph{Output parameters:}\hfill\\*
7054 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7055 \multicolumn{1}{c}{Name} &
7056 \multicolumn{1}{c}{Type} &
7057 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7058 \multicolumn{1}{c}{Signed?} &
7059 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7060 \bitvar{RECP}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7061                                8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
7062  array containing the contents of a plane of the reconstructed frame. \\
7063 \bottomrule\end{tabularx}
7064
7065 \paragraph{Variables used:}\hfill\\*
7066 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7067 \multicolumn{1}{c}{Name} &
7068 \multicolumn{1}{c}{Type} &
7069 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7070 \multicolumn{1}{c}{Signed?} &
7071 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7072 \locvar{R}         & Integer &  9 & Yes & The edge detector response. \\
7073 \locvar{P}         & Integer &  9 & Yes & A filtered pixel value. \\
7074 \locvar{\idx{bx}}  & Integer & 20 & No  & The horizontal pixel index in the
7075  block. \\
7076 \bottomrule\end{tabularx}
7077 \medskip
7078
7079 This procedure applies a $4$-tap vertical filter to each column of a horizontal
7080  block edge.
7081
7082 \begin{enumerate}
7083 \item
7084 For each value of \locvar{\idx{bx}} from $0$ to $7$:
7085 \begin{enumerate}
7086 \item
7087 Assign \locvar{R} the value
7088 \begin{multline*}
7089 (\bitvar{RECP}[\bitvar{FY}][\bitvar{FX}+\locvar{\idx{bx}}]-
7090  3*\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+\\
7091  3*\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]-
7092  \bitvar{RECP}[\bitvar{FY}+3][\bitvar{FX}+\locvar{\idx{bx}}]+4)>>3
7093 \end{multline*}
7094 \item
7095 Assign \locvar{P} the value
7096  $(\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]+
7097  \lflim(\locvar{R},\bitvar{L}))$.
7098 \item
7099 If \locvar{P} is less than zero, assign
7100  $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero.
7101 \item
7102 Otherwise, if \locvar{P} is greater than $255$, assign
7103  $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$.
7104 \item
7105 Otherwise, assign
7106  $\bitvar{RECP}[\bitvar{FY}+1][\bitvar{FX}+\locvar{\idx{bx}}]$ the value
7107  \locvar{P}.
7108 \item
7109 Assign \locvar{P} the value
7110  $(\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]-
7111  \lflim(\locvar{R},\bitvar{L}))$.
7112 \item
7113 If \locvar{P} is less than zero, assign
7114  $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value zero.
7115 \item
7116 Otherwise, if \locvar{P} is greater than $255$, assign
7117  $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value $255$.
7118 \item
7119 Otherwise, assign
7120  $\bitvar{RECP}[\bitvar{FY}+2][\bitvar{FX}+\locvar{\idx{bx}}]$ the value
7121  \locvar{P}.
7122 \end{enumerate}
7123 \end{enumerate}
7124
7125 \subsection{Complete Loop Filter}
7126 \label{sub:loop-filt}
7127
7128 \paragraph{Input parameters:}\hfill\\*
7129 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7130 \multicolumn{1}{c}{Name} &
7131 \multicolumn{1}{c}{Type} &
7132 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7133 \multicolumn{1}{c}{Signed?} &
7134 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7135 \bitvar{LFLIMS}    & \multicolumn{1}{p{40pt}}{Integer array} &
7136                               7 & No & A 64-element array of loop filter limit
7137  values. \\
7138 \bitvar{RPYW}      & Integer & 20 & No  & The width of the $Y'$ plane of the
7139  reconstruced frame in pixels. \\
7140 \bitvar{RPYH}      & Integer & 20 & No  & The height of the $Y'$ plane of the
7141  reconstruced frame in pixels. \\
7142 \bitvar{RPCW}      & Integer & 20 & No  & The width of the $C_b$ and $C_r$
7143  planes of the reconstruced frame in pixels. \\
7144 \bitvar{RPCH}      & Integer & 20 & No  & The height of the $C_b$ and $C_r$
7145  planes of the reconstruced frame in pixels. \\
7146 \bitvar{NBS}       & Integer & 36 & No  & The total number of blocks in a
7147  frame. \\
7148 \bitvar{BCODED}    & \multicolumn{1}{p{40pt}}{Integer Array} &
7149                                 1 & No  & An \bitvar{NBS}-element array of
7150  flags indicating which blocks are coded. \\
7151 \bitvar{QIS}       & \multicolumn{1}{p{40pt}}{Integer array} &
7152                                 6 & No  & An \bitvar{NQIS}-element array of
7153  \qi\ values. \\
7154 \bitvar{RECY}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7155                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7156  array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7157 \bitvar{RECCB}     & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7158                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7159  array containing the contents of the $C_b$ plane of the reconstructed frame. \\
7160 \bitvar{RECCR}     & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7161                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7162  array containing the contents of the $C_r$ plane of the reconstructed frame. \\
7163 \bottomrule\end{tabularx}
7164
7165 \paragraph{Output parameters:}\hfill\\*
7166 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7167 \multicolumn{1}{c}{Name} &
7168 \multicolumn{1}{c}{Type} &
7169 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7170 \multicolumn{1}{c}{Signed?} &
7171 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7172 \bitvar{RECY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7173                                8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7174  array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7175 \bitvar{RECCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7176                                8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7177  array containing the contents of the $C_b$ plane of the reconstructed frame. \\
7178 \bitvar{RECCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7179                                8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7180  array containing the contents of the $C_r$ plane of the reconstructed frame. \\
7181 \bottomrule\end{tabularx}
7182
7183 \paragraph{Variables used:}\hfill\\*
7184 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7185 \multicolumn{1}{c}{Name} &
7186 \multicolumn{1}{c}{Type} &
7187 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7188 \multicolumn{1}{c}{Signed?} &
7189 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7190 \locvar{RPW}       & Integer & 20 & No  & The width of the current plane of the
7191  reconstructed frame in pixels. \\
7192 \locvar{RPH}       & Integer & 20 & No  & The height of the current plane of
7193  the reconstructed frame in pixels. \\
7194 \locvar{RECP}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7195                                 8 & No  & A $\bitvar{RPH}\times\bitvar{RPW}$
7196  array containing the contents of the current plane of the reconstruced
7197  frame. \\
7198 \locvar{BX}        & Integer & 20 & No  & The horizontal pixel index of the
7199  lower-left corner of the current block. \\
7200 \locvar{BY}        & Integer & 20 & No  & The vertical pixel index of the
7201  lower-left corner of the current block. \\
7202 \locvar{FX}        & Integer & 20 & No  & The horizontal pixel index of the
7203  lower-left corner of the area to be filtered. \\
7204 \locvar{FY}        & Integer & 20 & No  & The vertical pixel index of the
7205  lower-left corner of the area to be filtered. \\
7206 \locvar{L}         & Integer &  7 & No  & The loop filter limit value. \\
7207 \locvar{\bi}       & Integer & 36 & No  & The index of the current block in
7208  coded order. \\
7209 \locvar{\bj}       & Integer & 36 & No  & The index of a neighboring block in
7210  coded order. \\
7211 \locvar{\pli}      & Integer &  2 & No  & The color plane index of the current
7212  block. \\
7213 \bottomrule\end{tabularx}
7214 \medskip
7215
7216 This procedure defines the order that the various block edges are filtered.
7217 Because each application of one of the two filters above destructively modifies
7218  the contents of the reconstructed image, the precise output obtained differs
7219  depending on the order that horizontal and vertical filters are applied to the
7220  edges of a single block.
7221 The order defined here conforms to that used by VP3.
7222
7223 \begin{enumerate}
7224 \item
7225 Assign \locvar{L} the value $\bitvar{LFLIMS}[\bitvar{QIS}[0]]$.
7226 \item
7227 For each block in {\em raster} order, with coded-order index \locvar{\bi}:
7228 \begin{enumerate}
7229 \item
7230 If $\bitvar{BCODED}[\locvar{\bi}]$ is non-zero:
7231 \begin{enumerate}
7232 \item
7233 Assign \locvar{\pli} the index of the color plane block \locvar{\bi} belongs
7234  to.
7235 \item
7236 Assign \locvar{RECP}, \locvar{RPW}, and \locvar{RPH} the values given in
7237  Table~\ref{tab:recp} corresponding to the value of \locvar{\pli}.
7238
7239 \begin{table}[htbp]
7240 \begin{center}
7241 \begin{tabular}{clll}\toprule
7242 \locvar{\pli} & \locvar{RECP}  & \locvar{RPW}  & \locvar{RPH}  \\\midrule
7243 $0$           & \bitvar{RECY}  & \bitvar{RPYW} & \bitvar{RPYH} \\
7244 $1$           & \bitvar{RECCB} & \bitvar{RPCW} & \bitvar{RPCH} \\
7245 $2$           & \bitvar{RECCR} & \bitvar{RPCW} & \bitvar{RPCH} \\
7246 \bottomrule\end{tabular}
7247 \end{center}
7248 \caption{Reconstructed Planes and Sizes for Each \locvar{\pli}}
7249 \label{tab:recp}
7250 \end{table}
7251
7252 \item
7253 Assign \locvar{BX} the horizontal pixel index of the lower-left corner of the
7254  block \locvar{\bi}.
7255 \item
7256 Assign \locvar{BY} the vertical pixel index of the lower-left corner of the
7257  block \locvar{\bi}.
7258 \item
7259 If \locvar{BX} is greater than zero:
7260 \begin{enumerate}
7261 \item
7262 Assign \locvar{FX} the value $(\locvar{BX}-2)$.
7263 \item
7264 Assign \locvar{FY} the value \locvar{BY}.
7265 \item
7266 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7267  horizontal block filter to the left edge of block \locvar{\bi} with the
7268  procedure described in Section~\ref{sub:filth}.
7269 \end{enumerate}
7270 \item
7271 If \locvar{BY} is greater than zero:
7272 \begin{enumerate}
7273 \item
7274 Assign \locvar{FX} the value \locvar{BX}.
7275 \item
7276 Assign \locvar{FY} the value $(\locvar{BY}-2)$
7277 \item
7278 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7279  vertical block filter to the bottom edge of block \locvar{\bi} with the
7280  procedure described in Section~\ref{sub:filtv}.
7281 \end{enumerate}
7282 \item
7283 If $(\locvar{BX}+8)$ is less than \locvar{RPW} and
7284  $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order
7285  index of the block adjacent to \locvar{\bi} on the right:
7286 \begin{enumerate}
7287 \item
7288 Assign \locvar{FX} the value $(\locvar{BX}+6)$.
7289 \item
7290 Assign \locvar{FY} the value \locvar{BY}.
7291 \item
7292 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7293  horizontal block filter to the right edge of block \locvar{\bi} with the
7294  procedure described in Section~\ref{sub:filth}.
7295 \end{enumerate}
7296 \item
7297 If $(\locvar{BY}+8)$ is less than \locvar{RPH} and
7298  $\bitvar{BCODED}[\locvar{\bj}]$ is zero, where \locvar{\bj} is the coded-order
7299  index of the block adjacent to \locvar{\bi} above:
7300 \begin{enumerate}
7301 \item
7302 Assign \locvar{FX} the value \locvar{BX}.
7303 \item
7304 Assign \locvar{FY} the value $(\locvar{BY}+6)$
7305 \item
7306 Using \locvar{RECP}, \locvar{FX}, \locvar{FY}, and \locvar{L}, apply the
7307  vertical block filter to the top edge of block \locvar{\bi} with the
7308  procedure described in Section~\ref{sub:filtv}.
7309 \end{enumerate}
7310 \end{enumerate}
7311 \end{enumerate}
7312 \end{enumerate}
7313
7314 \paragraph{VP3 Compatibility}
7315
7316 The original VP3 decoder implemented unrestricted motion vectors by enlarging
7317  the reconstructed frame buffers and repeating the pixels on its edges into the
7318  padding region.
7319 However, for the previous reference frame this padding ocurred before the loop
7320  filter was applied, but for the golden reference frame it occurred afterwards.
7321
7322 This means that for the previous reference frame, the padding values were
7323  required to be stored separately from the main image values.
7324 Furthermore, even if the previous and golden reference frames were in fact the
7325  same frame, they could have different padding values.
7326 Finally, the encoder did not apply the loop filter at all, which resulted in
7327  artifacts, particularly in near-static scenes, due to prediction-loop
7328  mismatch.
7329 This last can only be considered a bug in the VP3 encoder.
7330
7331 Given all these things, Theora now uniformly applies the loop filter before
7332  the reference frames are padded.
7333 This means it is possible to use the same buffer for the previous and golden
7334  reference frames when they do indeed refer to the same frame.
7335 It also means that on architectures where memory bandwidth is limited, it is
7336  possible to avoid storing padding values, and simply clamp the motion vectors
7337  applied to each pixel as described in Sections~\ref{sub:predfullpel}
7338  and~\ref{sub:predhalfpel}.
7339 This means that the predicted pixel values along the edges of the frame might
7340  differ slightly between VP3 and Theora, but since the VP3 encoder did not
7341  apply the loop filter in the first place, this is not likely to impose any
7342  serious compatibility issues.
7343
7344 \section{Complete Frame Decode}
7345
7346 \paragraph{Input parameters:}\hfill\\*
7347 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7348 \multicolumn{1}{c}{Name} &
7349 \multicolumn{1}{c}{Type} &
7350 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7351 \multicolumn{1}{c}{Signed?} &
7352 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7353 \bitvar{FMBW}      & Integer & 16 & No  & The width of the frame in macro
7354  blocks. \\
7355 \bitvar{FMBH}      & Integer & 16 & No  & The height of the frame in macro
7356  blocks. \\
7357 \bitvar{NSBS}      & Integer & 32 & No  & The total number of super blocks in a
7358  frame. \\
7359 \bitvar{NBS}       & Integer & 36 & No  & The total number of blocks in a
7360  frame. \\
7361 \bitvar{NMBS}      & Integer & 32 & No  & The total number of macro blocks in a
7362  frame. \\
7363 \bitvar{FRN}       & Integer & 32 & No  & The frame-rate numerator. \\
7364 \bitvar{FRD}       & Integer & 32 & No  & The frame-rate denominator. \\
7365 \bitvar{PARN}      & Integer & 24 & No  & The pixel aspect-ratio numerator. \\
7366 \bitvar{PARD}      & Integer & 24 & No  & The pixel aspect-ratio
7367  denominator. \\
7368 \bitvar{CS}        & Integer &  8 & No  & The color space. \\
7369 \bitvar{PF}        & Integer &  2 & No  & The pixel format. \\
7370 \bitvar{NOMBR}     & Integer & 24 & No  & The nominal bitrate of the stream, in
7371  bits per second. \\
7372 \bitvar{QUAL}      & Integer &  6 & No  & The quality hint. \\
7373 \bitvar{KFGSHIFT}  & Integer &  5 & No  & The amount to shift the key frame
7374  number by in the granule position. \\
7375 \bitvar{LFLIMS}    & \multicolumn{1}{p{40pt}}{Integer array} &
7376                                 7 & No  & A 64-element array of loop filter
7377  limit values. \\
7378 \bitvar{ACSCALE}   & \multicolumn{1}{p{40pt}}{Integer array} &
7379                                16 & No  & A 64-element array of scale values
7380  for AC coefficients for each \qi\ value. \\
7381 \bitvar{DCSCALE}   & \multicolumn{1}{p{40pt}}{Integer array} &
7382                                16 & No  & A 64-element array of scale values
7383  for the DC coefficient for each \qi\ value. \\
7384 \bitvar{NBMS}      & Integer & 10 & No  & The number of base matrices. \\
7385 \bitvar{BMS}       & \multicolumn{1}{p{50pt}}{2D Integer array} &
7386                                 8 & No  & A $\bitvar{NBMS}\times 64$ array
7387  containing the base matrices. \\
7388 \bitvar{NQRS}      & \multicolumn{1}{p{50pt}}{2D Integer array} &
7389                                 6 & No  & A $2\times 3$ array containing the
7390  number of quant ranges for a given \qti\ and \pli, respectively.
7391 This is at most $63$. \\
7392 \bitvar{QRSIZES}   & \multicolumn{1}{p{50pt}}{3D Integer array} &
7393                                 6 & No  & A $2\times 3\times 63$ array of the
7394  sizes of each quant range for a given \qti\ and \pli, respectively.
7395 Only the first $\bitvar{NQRS}[\qti][\pli]$ values will be used. \\
7396 \bitvar{QRBMIS}    & \multicolumn{1}{p{50pt}}{3D Integer array} &
7397                                 9 & No  & A $2\times 3\times 64$ array of the
7398  \bmi's used for each quant range for a given \qti\ and \pli, respectively.
7399 Only the first $(\bitvar{NQRS}[\qti][\pli]+1)$ values will be used. \\
7400 \bitvar{HTS}       & \multicolumn{3}{l}{Huffman table array}
7401                                         & An 80-element array of Huffman tables
7402  with up to 32 entries each. \\
7403 \bitvar{GOLDREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7404                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7405  array containing the contents of the $Y'$ plane of the golden reference
7406  frame. \\
7407 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7408                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7409  array containing the contents of the $C_b$ plane of the golden reference
7410  frame. \\
7411 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7412                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7413  array containing the contents of the $C_r$ plane of the golden reference
7414  frame. \\
7415 \bitvar{PREVREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7416                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7417  array containing the contents of the $Y'$ plane of the previous reference
7418  frame. \\
7419 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7420                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7421  array containing the contents of the $C_b$ plane of the previous reference
7422  frame. \\
7423 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7424                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7425  array containing the contents of the $C_r$ plane of the previous reference
7426  frame. \\
7427 \bottomrule\end{tabularx}
7428
7429 \paragraph{Output parameters:}\hfill\\*
7430 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7431 \multicolumn{1}{c}{Name} &
7432 \multicolumn{1}{c}{Type} &
7433 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7434 \multicolumn{1}{c}{Signed?} &
7435 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7436 \bitvar{RECY}      & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7437                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7438  array containing the contents of the $Y'$ plane of the reconstructed frame. \\
7439 \bitvar{RECCB}     & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7440                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7441  array containing the contents of the $C_b$ plane of the reconstructed
7442  frame. \\
7443 \bitvar{RECCR}     & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7444                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7445  array containing the contents of the $C_r$ plane of the reconstructed
7446  frame. \\
7447 \bitvar{GOLDREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7448                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7449  array containing the contents of the $Y'$ plane of the golden reference
7450  frame. \\
7451 \bitvar{GOLDREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7452                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7453  array containing the contents of the $C_b$ plane of the golden reference
7454  frame. \\
7455 \bitvar{GOLDREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7456                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7457  array containing the contents of the $C_r$ plane of the golden reference
7458  frame. \\
7459 \bitvar{PREVREFY}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7460                                 8 & No  & A $\bitvar{RPYH}\times\bitvar{RPYW}$
7461  array containing the contents of the $Y'$ plane of the previous reference
7462  frame. \\
7463 \bitvar{PREVREFCB} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7464                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7465  array containing the contents of the $C_b$ plane of the previous reference
7466  frame. \\
7467 \bitvar{PREVREFCR} & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7468                                 8 & No  & A $\bitvar{RPCH}\times\bitvar{RPCW}$
7469  array containing the contents of the $C_r$ plane of the previous reference
7470  frame. \\
7471 \bottomrule\end{tabularx}
7472
7473 \paragraph{Variables used:}\hfill\\*
7474 \begin{tabularx}{\textwidth}{@{}llrcX@{}}\toprule
7475 \multicolumn{1}{c}{Name} &
7476 \multicolumn{1}{c}{Type} &
7477 \multicolumn{1}{p{30pt}}{\centering Size (bits)} &
7478 \multicolumn{1}{c}{Signed?} &
7479 \multicolumn{1}{c}{Description and restrictions} \\\midrule\endhead
7480 \locvar{FTYPE}   & Integer &  1 & No  & The frame type. \\
7481 \locvar{NQIS}    & Integer &  2 & No  & The number of \qi\ values. \\
7482 \locvar{QIS}     & \multicolumn{1}{p{40pt}}{Integer array} &
7483                               6 & No  & An \locvar{NQIS}-element array of
7484  \qi\ values. \\
7485 \locvar{BCODED}  & \multicolumn{1}{p{40pt}}{Integer Array} &
7486                               1 & No  & An \bitvar{NBS}-element array of flags
7487  indicating which blocks are coded. \\
7488 \locvar{MBMODES} & \multicolumn{1}{p{40pt}}{Integer Array} &
7489                               3 & No  & An \bitvar{NMBS}-element array of
7490  coding modes for each macro block. \\
7491 \locvar{MVECTS}  & \multicolumn{1}{p{50pt}}{Array of 2D Integer Vectors} &
7492                               6 & Yes & An \bitvar{NBS}-element array of motion
7493  vectors for each block. \\
7494 \locvar{QIIS}    & \multicolumn{1}{p{40pt}}{Integer Array} &
7495                               2 & No  & An \bitvar{NBS}-element array of
7496  \locvar{\qii} values for each block. \\
7497 \locvar{COEFFS}  & \multicolumn{1}{p{50pt}}{2D Integer Array} &
7498                              16 & Yes & An $\bitvar{NBS}\times 64$ array of
7499  quantized DCT coefficient values for each block in zig-zag order. \\
7500 \locvar{NCOEFFS} & \multicolumn{1}{p{40pt}}{Integer Array} &
7501                               7 & No  & An \bitvar{NBS}-element array of the
7502  coefficient count for each block. \\
7503 \bitvar{RPYW}    & Integer & 20 & No  & The width of the $Y'$ plane of the
7504  reference frames in pixels. \\
7505 \bitvar{RPYH}    & Integer & 20 & No  & The height of the $Y'$ plane of the
7506  reference frames in pixels. \\
7507 \bitvar{RPCW}    & Integer & 20 & No  & The width of the $C_b$ and $C_r$
7508  planes of the reference frames in pixels. \\
7509 \bitvar{RPCH}    & Integer & 20 & No  & The height of the $C_b$ and $C_r$
7510  planes of the reference frames in pixels. \\
7511 \locvar{\bi}     & Integer & 36 & No  & The index of the current block in coded
7512  order. \\
7513 \bottomrule\end{tabularx}
7514 \medskip
7515
7516 This procedure uses all the procedures defined in the previous section of this
7517  chapter to decode and reconstruct a complete frame.
7518 As a special case, a 0-byte packet is treated exactly like an inter frame with
7519  no coded blocks.
7520 It takes as input values decoded from the headers, as well as the current
7521  reference frames.
7522 As output, it gives the uncropped, reconstructed frame.
7523 This should be cropped to picture region before display.
7524
7525 \begin{enumerate}
7526 \item
7527 If the size of the data packet is non-zero:
7528 \begin{enumerate}
7529 \item
7530 Decode the frame header values \locvar{FTYPE}, \locvar{NQIS}, and \locvar{QIS}
7531  using the procedure given in Section~\ref{sub:frame-header}.
7532 \item
7533 Using \locvar{FTYPE}, \bitvar{NSBS}, and \bitvar{NBS}, decode the list of coded
7534  block flags into \locvar{BCODED} using the procedure given in
7535  Section~\ref{sub:coded-blocks}.
7536 \item
7537 Using \locvar{FTYPE}, \bitvar{NMBS}, \bitvar{NBS}, and \bitvar{BCODED}, decode
7538  the macro block coding modes into \locvar{MBMODES} using the procedure given
7539  in Section~\ref{sub:mb-modes}.
7540 \item
7541 If \locvar{FTYPE} is non-zero (inter frame), using \bitvar{PF}, \bitvar{NMBS},
7542  \locvar{MBMODES}, \bitvar{NBS}, and \locvar{BCODED}, decode the motion vectors
7543  into \locvar{MVECTS} using the procedure given in Section~\ref{sub:mv-decode}.
7544 \item
7545 Using \bitvar{NBS}, \locvar{BCODED}, and \locvar{NQIS}, decode the block-level
7546  \qi\ values into \locvar{QIIS} using the procedure given in
7547  Section~\ref{sub:block-qis}.
7548 \item
7549 Using \bitvar{NBS}, \bitvar{NMBS}, \locvar{BCODED}, and \bitvar{HTS}, decode
7550  the DCT coefficients into \locvar{NCOEFFS} and \locvar{NCOEFFS} using the
7551  procedure given in Section~\ref{sub:dct-coeffs}.
7552 \item
7553 Using \locvar{BCODED}, \locvar{MBMODES}, undo the DC prediction on the DC
7554  coefficients stored in \locvar{COEFFS} using the procedure given in
7555  Section~\ref{sub:dc-pred-undo}.
7556 \end{enumerate}
7557 \item
7558 Otherwise:
7559 \begin{enumerate}
7560 \item
7561 Assign \locvar{FTYPE} the value 1 (inter frame).
7562 \item
7563 Assign \locvar{NQIS} the value 1.
7564 \item
7565 Assign $\locvar{QIS}[0]$ the value 63.
7566 \item
7567 For each value of \locvar{\bi} from 0 to $(\bitvar{NBS}-1)$, assign
7568  $\locvar{BCODED}[\locvar{\bi}]$ the value zero.
7569 \end{enumerate}
7570 \item
7571 Assign \locvar{RPYW} and \locvar{RPYH} the values $(16*\bitvar{FMBW})$ and
7572  $(16*\bitvar{FMBH})$, respectively.
7573 \item
7574 Assign \locvar{RPCW} and \locvar{RPCH} the values from the row of
7575  Table~\ref{tab:rpcwh-for-pf} corresponding to \bitvar{PF}.
7576
7577 \begin{table}[tb]
7578 \begin{center}
7579 \begin{tabular}{crr}\toprule
7580 \bitvar{PF} & \multicolumn{1}{c}{\locvar{RPCW}}
7581                                  & \multicolumn{1}{c}{\locvar{RPCH}} \\\midrule
7582 $0$         &  $8*\bitvar{FMBW}$ &  $8*\bitvar{FMBH}$ \\
7583 $2$         &  $8*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\
7584 $3$         & $16*\bitvar{FMBW}$ & $16*\bitvar{FMBH}$ \\
7585 \bottomrule\end{tabular}
7586 \end{center}
7587 \caption{Width and Height of Chroma Planes for each Pixel Format}
7588 \label{tab:rpcwh-for-pf}
7589 \end{table}
7590
7591 \item
7592 Using \bitvar{ACSCALE}, \bitvar{DCSCALE}, \bitvar{BMS}, \bitvar{NQRS},
7593  \bitvar{QRSIZES}, \bitvar{QRBMIS}, \bitvar{NBS}, \locvar{BCODED},
7594  \locvar{MBMODES}, \locvar{MVECTS}, \locvar{COEFFS}, \locvar{NCOEFFS},
7595  \locvar{QIS}, \locvar{QIIS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW},
7596  \locvar{RPCH}, \bitvar{GOLDREFY}, \bitvar{GOLDREFCB}, \bitvar{GOLDREFCR},
7597  \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR}, reconstruct the
7598  complete frame into \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR} using
7599  the procedure given in Section~\ref{sub:recon}.
7600 \item
7601 Using \bitvar{LFLIMS}, \locvar{RPYW}, \locvar{RPYH}, \locvar{RPCW},
7602  \locvar{RPCH}, \bitvar{NBS}, \locvar{BCODED}, and \locvar{QIS}, apply the loop
7603  filter to the reconstructed frame in \bitvar{RECY}, \bitvar{RECCB}, and
7604  \bitvar{RECCR} using the procedure given in Section~\ref{sub:loop-filt}.
7605 \item
7606 If \locvar{FTYPE} is zero (intra frame), assign \bitvar{GOLDREFY},
7607  \bitvar{GOLDREFCB}, and \bitvar{GOLDREFCR} the values \bitvar{RECY},
7608  \bitvar{RECCB}, and \bitvar{RECCR}, respectively.
7609 \item
7610 Assign \bitvar{PREVREFY}, \bitvar{PREVREFCB}, and \bitvar{PREVREFCR} the values
7611  \bitvar{RECY}, \bitvar{RECCB}, and \bitvar{RECCR}, respectively.
7612 \end{enumerate}
7613
7614 %\backmatter
7615 \appendix
7616
7617 \chapter{Ogg Bitstream Encapsulation}
7618 \label{app:oggencapsulation}
7619
7620 \section{Overview}
7621
7622 This document specifies the embedding or encapsulation of Theora packets
7623  in an Ogg transport stream.
7624
7625 Ogg is a stream oriented wrapper for coded, linear time-based data.
7626 It provides syncronization, multiplexing, framing, error detection and
7627  seeking landmarks for the decoder and complements the raw packet format
7628  used by the Theora codec.
7629
7630 This document assumes familiarity with the details of the Ogg standard.
7631 The Xiph.org documentation provides an overview of the Ogg transport stream
7632  format at \url{http://www.xiph.org/ogg/doc/oggstream.html} and a detailed
7633  description at \url{http://www.xiph.org/ogg/doc/framing.html}.
7634 The format is also defined in RFC~3533 \cite{rfc3533}.
7635 While Theora packets can be embedded in a wide variety of media
7636  containers and streaming mechanisms, the Xiph.org Foundation
7637  recommends Ogg as the native format for Theora video in file-oriented
7638  storage and transmission contexts.
7639
7640 \subsection{MIME type}
7641
7642 The correct MIME type of any Ogg file is {\tt application/ogg}.
7643 Outside of an encapsulation, the mime type {\tt video/x-theora} may
7644  be used to refer specifically to the Theora compressed video stream.
7645
7646 \section{Embedding in a logical bitstream}
7647
7648 Ogg separates a {\em logical bitstream} consisting of the framing of
7649  a particular sequence of packets and complete within itself from
7650  the {\em physical bitstream} which may consist either of a single
7651  logical bitstream or a number of logical bitstreams multiplexed
7652  together.
7653 This section specifies the embedding of Theora packets in a logical Ogg
7654  bitstream.
7655 The mapping of Ogg Theora logical bitstreams into a multiplexed physical Ogg
7656  stream is described in the next section.
7657
7658 \subsection{Headers}
7659
7660 The initial info header packet appears by itself in a single Ogg page.
7661 This page defines the start of the logical stream and MUST have
7662  the `beginning of stream' flag set.
7663
7664 The second and third header packets (metadata comments and decoder
7665  setup data) can together span one or more Ogg pages.
7666 If there are additional non-normative header packets, they MUST be
7667  included in this sequence of pages as well.
7668 The comment header packet MUST begin the second Ogg page in the logical
7669  bitstream, and there MUST be a page break between the last header
7670  packet and the first frame data packet.
7671
7672 These two page break requirements facilitate stream identification and
7673  simplify header acquisition for seeking and live streaming applications.
7674
7675 All header pages MUST have their granule position field set to zero.
7676 %TODO: or -1?
7677 %TBT: What are we doing now?
7678
7679 \subsection{Frame data}
7680
7681 The first frame data packet in a logical bitstream MUST begin a fresh page.
7682 All other data packets are placed one at a time into Ogg pages
7683  until the end of the stream.
7684 Packets can span pages and multiple packets can be placed within any
7685  one page.
7686 The last page in the logical bitstream MUST have its `end of stream'
7687  flag set.
7688
7689 Frame data pages MUST be marked with a granule index corresponding to
7690  the display time of the last frame/packet that finishes in that page.
7691
7692 {\bf Note:}
7693 This scheme is still under discussion.
7694 It has also been proposed that pages be labeled with a granule corresponding to
7695  the first frame that begins on that page.
7696 This simplifies seeking and mux, but is different from the published
7697  definition of the Ogg granule field.
7698 This document will be updated when the issue is settled.
7699
7700 %TODO: \subsection{Granule position}
7701
7702 \section{Multiplexed stream mapping}
7703
7704 Applications supporting Ogg Theora I must support Theora bitstreams
7705  multiplexed with compressed audio data in the Vorbis I and Speex
7706  formats, and should support Ogg-encapsulated MNG graphics for overlays.
7707 % and the Writ format for text-based titling.
7708 %TBT: That's great... do these things have specifications?
7709
7710 Multiple audio and video bitstreams may be multiplexed together.
7711 How playback of multiple/alternate streams is handled is up to the
7712  application.
7713 Some conventions based on included metadata aide interoperability
7714  in this respect.
7715 %TODO: describe multiple vs. alternate streams, language mapping
7716 % and reference metadata descriptions.
7717
7718 \subsection{Chained streams}
7719
7720 Ogg Theora decoders and playback applications MUST support both grouped
7721  streams (multiplexed concurrent logical streams) and chained streams
7722  (sequential concatenation of independent physical bitstreams).
7723
7724 The number and codec data types of multiplexed streams and the decoder
7725  parameters for those stream types that re-occur can all change at a
7726  chaining boundary.
7727 A playback application MUST be prepared to handle such changes and
7728  SHOULD do so smoothly with the minimum possible visible disruption.
7729 The specification of grouped streams below applies independently to each
7730  segment of a chained bitstream.
7731
7732 \subsection{Grouped streams}
7733
7734 At the beginning of a multiplexed stream, the `beginning of stream'
7735  pages for each logical bitstream will be grouped together.
7736 Within these, the first page to occur MUST be the Theora page.
7737 This facilitates identification of Ogg Theora files among other
7738  Ogg-encapsulated content.
7739 A playback application must nevertheless handle streams where this
7740  arrangement is not correct.
7741 %TBT: Then what's the point of requiring it in the spec?
7742
7743 If there is more than one Theora logical stream, the first page should
7744  be from the primary stream.
7745 That is, the best choice for the stream a generic player should begin
7746  displaying without special user direction.
7747 If there is more than one audio stream, or of any other stream
7748  type, the identification page of the primary stream of that type
7749  should be placed before the others.
7750 %TBT: That's all pretty vague.
7751
7752 After the `beginning of stream' pages, the header pages of each of
7753  the logical streams should be grouped together before any data pages
7754  occur.
7755 %TBT: should or must?
7756
7757 After all the header pages have been placed,
7758  the data pages are multiplexed together.
7759 They should be placed in the stream in increasing order by the playback
7760  time equivalents of their granule fields.
7761 This facilitates seeking while limiting the buffering requirements of the
7762  playback demultiplexer.
7763 %TODO: A lot of this language is encoder-oriented.
7764 %TODO: We define a decoder-oriented specification.
7765 %TODO: The language should be changed to match.
7766
7767 \cleardoublepage
7768 \chapter{VP3}
7769
7770 \section{VP3 Compatibility}
7771 \label{app:vp3-compat}
7772 This section lists all of the encoder and decoder issues that may affect VP3
7773  compatibly.
7774 Each is described in more detail in the text itself.
7775 This list is provided merely for reference.
7776
7777 \begin{itemize}
7778 \item
7779 Bitstream headers (Section~\ref{sec:headers}).
7780 \begin{itemize}
7781 \item
7782 Identification header (Section~\ref{sec:idheader}).
7783 \begin{itemize}
7784 \item
7785 Non-multiple of 16 picture sizes.
7786 \item
7787 Standardized color spaces.
7788 \item
7789 Support for $4:4:4$ and $4:2:2$ pixel formats.
7790 \end{itemize}
7791 \item
7792 Setup header
7793 \begin{itemize}
7794 \item
7795 Loop filter limit values (Section~\ref{sub:loop-filter-limits}).
7796 \item
7797 Quantization parameters (Section~\ref{sub:quant-params}).
7798 \item
7799 Huffman tables (Section~\ref{sub:huffman-tables}).
7800 \end{itemize}
7801 \end{itemize}
7802 \item
7803 Frame header format (Section~\ref{sub:frame-header}).
7804 \item
7805 Extended long-run bit strings (Section~\ref{sub:long-run}).
7806 \item
7807 INTER\_MV\_FOUR handling of uncoded blocks (Section~\ref{sub:mb-mv-decode}).
7808 \item
7809 Block-level \qi\ values (Section~\ref{sub:block-qis}).
7810 \item
7811 Zero-length EOB runs (Section~\ref{sub:eob-token}).
7812 \item
7813 Unrestricted motion vector padding and the loop filter
7814  (Section~\ref{sub:loop-filt}).
7815 \end{itemize}
7816
7817 \section{Loop Filter Limit Values}
7818 \label{app:vp3-loop-filter-limits}
7819
7820 The hard-coded loop filter limit values used in VP3 are defined as follows:
7821 \begin{align*}
7822 \bitvar{LFLIMS} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7823 \{ & 30, & 25, & 20, & 20, & 15, & 15, & 14, & 14,   &      \\
7824    & 13, & 13, & 12, & 12, & 11, & 11, & 10, & 10,   &      \\
7825    &  9, &  9, &  8, &  8, &  7, &  7, &  7, &  7,   &      \\
7826    &  6, &  6, &  6, &  6, &  5, &  5, &  5, &  5,   &      \\
7827    &  4, &  4, &  4, &  4, &  3, &  3, &  3, &  3,   &      \\
7828    &  2, &  2, &  2, &  2, &  2, &  2, &  2, &  2,   &      \\
7829    &  0, &  0, &  0, &  0, &  0, &  0, &  0, &  0,   &      \\
7830    &  0, &  0, &  0, &  0, &  0, &  0, &  0, &  0\;\ & \!\} \\
7831 \end{array}
7832 \end{align*}
7833
7834 \section{Quantization Parameters}
7835 \label{app:vp3-quant-params}
7836
7837 The hard-coded quantization parameters used by VP3 are defined as follows:
7838
7839 \begin{align*}
7840 \bitvar{ACSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7841 \{ & 500, & 450, & 400, & 370, & 340, & 310, & 285, & 265,   &      \\
7842    & 245, & 225, & 210, & 195, & 185, & 180, & 170, & 160,   &      \\
7843    & 150, & 145, & 135, & 130, & 125, & 115, & 110, & 107,   &      \\
7844    & 100, &  96, &  93, &  89, &  85, &  82, &  75, &  74,   &      \\
7845    &  70, &  68, &  64, &  60, &  57, &  56, &  52, &  50,   &      \\
7846    &  49, &  45, &  44, &  43, &  40, &  38, &  37, &  35,   &      \\
7847    &  33, &  32, &  30, &  29, &  28, &  25, &  24, &  22,   &      \\
7848    &  21, &  19, &  18, &  17, &  15, &  13, &  12, &  10\;\ & \!\} \\
7849 \end{array} \\
7850 \bitvar{DCSCALE} = & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7851 \{ & 220, & 200, & 190, & 180, & 170, & 170, & 160, & 160,   &      \\
7852    & 150, & 150, & 140, & 140, & 130, & 130, & 120, & 120,   &      \\
7853    & 110, & 110, & 100, & 100, &  90, &  90, &  90, &  80,   &      \\
7854    &  80, &  80, &  70, &  70, &  70, &  60, &  60, &  60,   &      \\
7855    &  60, &  50, &  50, &  50, &  50, &  40, &  40, &  40,   &      \\
7856    &  40, &  40, &  30, &  30, &  30, &  30, &  30, &  30,   &      \\
7857    &  30, &  20, &  20, &  20, &  20, &  20, &  20, &  20,   &      \\
7858    &  20, &  10, &  10, &  10, &  10, &  10, &  10, &  10\;\ & \!\} \\
7859 \end{array}
7860 \end{align*}
7861
7862 VP3 defines only a single quantization range for each quantization type and
7863  color plane, and the base matrix used is constant throughout the range.
7864 There are three base matrices defined.
7865 The first is used for the $Y'$ channel of INTRA mode blocks, and the second for
7866  both the $C_b$ and $C_r$ channels of INTRA mode blocks.
7867 The last is used for INTER mode blocks of all channels.
7868
7869 \begin{align*}
7870 \bitvar{BMS} = \{ & \begin{array}[t]{r@{}rrrrrrrr@{}l}
7871 \{ & 16, & 11, & 10, & 16, &  24, &  40, &  51, &  61,   &       \\
7872    & 12, & 12, & 14, & 19, &  26, &  58, &  60, &  55,   &       \\
7873    & 14, & 13, & 16, & 24, &  40, &  57, &  69, &  56,   &       \\
7874    & 14, & 17, & 22, & 29, &  51, &  87, &  80, &  62,   &       \\
7875    & 18, & 22, & 37, & 58, &  68, & 109, & 103, &  77,   &       \\
7876    & 24, & 35, & 55, & 64, &  81, & 104, & 113, &  92,   &       \\
7877    & 49, & 64, & 78, & 87, & 103, & 121, & 120, & 101,   &       \\
7878    & 72, & 92, & 95, & 98, & 112, & 100, & 103, &  99\;\ & \!\}, \\
7879 %\end{array} \\
7880 %& \begin{array}[t]{r@{}rrrrrrrr@{}l}
7881 \{ & 17, & 18, & 24, & 47, & 99, & 99, & 99, & 99,   &       \\
7882    & 18, & 21, & 26, & 66, & 99, & 99, & 99, & 99,   &       \\
7883    & 24, & 26, & 56, & 99, & 99, & 99, & 99, & 99,   &       \\
7884    & 47, & 66, & 99, & 99, & 99, & 99, & 99, & 99,   &       \\
7885    & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99,   &       \\
7886    & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99,   &       \\
7887    & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99,   &       \\
7888    & 99, & 99, & 99, & 99, & 99, & 99, & 99, & 99\;\ & \!\}, \\
7889 %\end{array} \\
7890 %& \begin{array}[t]{r@{}rrrrrrrr@{}l}
7891 \{ & 16, & 16, & 16, & 20, & 24, & 28, &  32, &  40,   &            \\
7892    & 16, & 16, & 20, & 24, & 28, & 32, &  40, &  48,   &            \\
7893    & 16, & 20, & 24, & 28, & 32, & 40, &  48, &  64,   &            \\
7894    & 20, & 24, & 28, & 32, & 40, & 48, &  64, &  64,   &            \\
7895    & 24, & 28, & 32, & 40, & 48, & 64, &  64, &  64,   &            \\
7896    & 28, & 32, & 40, & 48, & 64, & 64, &  64, &  96,   &            \\
7897    & 32, & 40, & 48, & 64, & 64, & 64, &  96, & 128,   &            \\
7898    & 40, & 48, & 64, & 64, & 64, & 96, & 128, & 128\;\ & \!\}\;\;\} \\
7899 \end{array}
7900 \end{align*}
7901
7902 The remaining parameters simply assign these matrices to the proper quant
7903  ranges.
7904
7905 \begin{align*}
7906 \bitvar{NQRS}    = & \{ \{1, 1, 1\}, \{1, 1, 1\} \} \\
7907 \bitvar{QRSIZES} = &
7908  \{ \{ \{1\}, \{1\}, \{1\} \}, \{ \{1\}, \{1\}, \{1\} \} \} \\
7909 \bitvar{QRBMIS}  = &
7910  \{ \{ \{0, 0\}, \{1, 1\}, \{1, 1\} \}, \{ \{2, 2\}, \{2, 2\}, \{2, 2\} \} \} \\
7911 \end{align*}
7912
7913 \section{Huffman Tables}
7914 \label{app:vp3-huffman-tables}
7915
7916 The following tables contain the hard-coded Huffman codes used by VP3.
7917 There are 80 tables in all, each with a Huffman code for all 32 token values.
7918 The tokens are sorted by the most significant bits of their Huffman code.
7919 This is the same order in which they will be decoded from the setup header.
7920
7921 \include{vp3huff}
7922
7923 \cleardoublepage
7924 \chapter{Colophon}
7925
7926 Ogg is a \href{http://www.xiph.org}{Xiph.org Foundation} effort to protect
7927  essential tenets of Internet multimedia from corporate hostage-taking; Open
7928  Source is the net's greatest tool to keep everyone honest.
7929 See \href{http://www.xiph.org/about.html}{About the Xiph.org Foundation} for
7930  details.
7931
7932 Ogg Theora is the first Ogg video codec.
7933 Anyone may freely use and distribute the Ogg and Theora specifications, whether
7934  in private, public, or corporate capacity.
7935 However, the Xiph.org Foundation and the Ogg project reserve the right to set
7936  the Ogg Theora specification and certify specification compliance.
7937
7938 Xiph.org's Theora software codec implementation is distributed under a BSD-like
7939  license.
7940 This does not restrict third parties from distributing independent
7941  implementations of Theora software under other licenses.
7942
7943 \begin{wrapfigure}{l}{0pt}
7944 \includegraphics[width=2.5cm]{xifish}
7945 \end{wrapfigure}
7946
7947 These pages are copyright \textcopyright{} 2004 Xiph.org Foundation.
7948 All rights reserved.
7949 Ogg, Theora, Vorbis, Xiph.org Foundation and their logos are trademarks
7950  (\texttrademark) of the \href{http://www.xiph.org}{Xiph.org Foundation}.
7951
7952 This document is set in \LaTeX.
7953
7954
7955
7956 \cleardoublepage
7957 \bibliography{spec}
7958
7959 \end{document}