Include a pre-rendered pdf version
[dirac-spec-errata.git] / picture-dec.tex
blob0ae56dc0e68c8a27811fd95757fb1872f3274a3f
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 % - This chapter defines the overall process - %
3 % - for decoding a picture - %
4 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6 \label{picturedec}
8 This section defines the processes for decoding a picture from a Dirac stream.
10 Picture decoding depends upon correctly parsing the stream, and decoding operations
11 are dependent on decoding the sequence header and picture metadata
12 (Section \ref{sequenceheader} and \ref{picturesyntax}) and unpacking the coefficient
13 and motion data (Sections \ref{wltunpacking} and \ref{motiondec}).
15 \subsection{Overall picture decoding process}
16 \label{overallpicturedec}
18 Picture data from the current picture being decoded is stored in the $\CurrentPicture$ state
19 variable, which is a map with labels $\PicNum$, and $Y$, $C1$ and $C2$ representing
20 luma and chroma data.
22 After decoding the decoded picture is returned to the decoding application.
24 The $picture\_decode()$ process shall be invoked after parsing the $picture\_parse()$ process and shall be defined as follows:
26 \begin{pseudo}{picture\_decode}{}
27 \bsCODE{\CurrentPicture=\{\}}
28 \bsCODE{\CurrentPicture[\PicNum]=\PictureNumber}
29 \bsIF{is\_ref(()}
30 \bsCODE{ref\_picture\_remove()}{\ref{refbuffer}}
31 \bsEND
32 \bsIF{\ZeroResidual==\false}
33 \bsCODE{inverse\_wavelet\_transform()}{\ref{idwt}}
34 \bsELSE
35 \bsCODE{\CurrentPicture[Y]=\bf{0}}
36 \bsCODE{\CurrentPicture[C1]=\bf{0}}
37 \bsCODE{\CurrentPicture[C2]=\bf{0}}
38 \bsEND
39 \bsIF{is\_inter()}
40 \bsCODE{ref1=get\_ref(\RefOneNum)}{\ref{refbuffer}}
41 \bsIF{num\_refs()==2}{\ref{parsecodevalues}}
42 \bsCODE{ref2=get\_ref(\RefTwoNum)}{\ref{refbuffer}}
43 \bsEND
44 \bsCODE{motion\_compensate(ref1[Y], ref2[Y], \CurrentPicture[Y], Y)}{\ref{motioncompensate}}
45 \bsCODE{motion\_compensate(ref1[C1], ref2[C1], \CurrentPicture[C1], C1)}{\ref{motioncompensate}}
46 \bsCODE{motion\_compensate(ref1[C2], ref2[C2], \CurrentPicture[C2], C2)}{\ref{motioncompensate}}
47 \bsEND
48 \bsCODE{clip\_picture()}{\ref{pictureclip}}
49 \bsIF{is\_ref()}
50 \bsCODE{ref\_picture\_add()}{\ref{refbuffer}}
51 \bsEND
52 \bsCODE{offset\_output\_picture(\CurrentPicture)}{\ref{videooutput}}
53 \bsRET{\CurrentPicture}
54 \end{pseudo}
57 \subsection{Picture reordering}
58 Picture numbers within the stream may not be in numerical order, and
59 subsequent reordering may be required: the size
60 of the decoded picture buffer required to perform any such reordering may be
61 specified as part of the application profile and level (Annex~\ref{profilelevel}).
63 \subsection{Random access}
64 \label{randomaccess}
65 Sequence headers represent safe entry points for decoding a sequence.
67 An accessible picture (with reference to a given sequence header)
68 shall be defined as a picture decodeable without dependence on
69 to data prior to the sequence header in coded order.
71 Accessibility should normally imply that each accessible picture has
72 no reference picture prior to the sequence header, and no chain of
73 references leading to a reference picture prior to the sequence header.
74 A given level may allow this condition to
75 be relaxed (for example, in P-only coding where unavailable references
76 may be substituted for by zero pictures), but where no specific provision
77 to the contrary is specified in an applicable level or profile, it shall
78 apply.
80 The first picture data unit after a sequence header shall be called the
81 access picture and shall be accessible with respect to the sequence header.
82 It should normally be an intra picture. If the sequence contains inter pictures
83 it should normally be an intra reference picture.
85 All picture data units subsequent to the sequence header in coded order
86 shall also be accessible with respect to the sequence header
87 if their picture numbers are greater than
88 or equal to that of the access picture. The access picture therefore represents
89 a temporal access point into the sequence.
91 \begin{informative}
93 If a sequence satisfies a maximum reordering depth constraint
94 (Annex~\ref{picturereordering})
95 of size $N$ all pictures more than $N$ pictures later than the sequence header will
96 have larger picture numbers than the first picture after the sequence header, and hence will be accessible. A reordering depth constraint thus implies that after a
97 sequence header at most $N$ pictures will need to be discarded before all pictures are decodeable.
98 \end{informative}
99 \subsection{Reference picture buffer management}
100 \label{refbuffer}
102 This section specifies how the Dirac stream data shall be used to manage the reference
103 picture buffer $\RefBuffer$. The reference picture buffer has a maximum size of
104 $\RefBufferSize$ elements, as set in the applicable level (Annex~\ref{profilelevel}).
106 The $ref\_picture\_remove()$ process shall be defined as
107 follows:
109 \begin{pseudo}{ref\_picture\_remove}{}
110 \bsCODE{n=\RetiredPicture}
111 \bsFOR{k=0}{\RefBufferSize-1}
112 \bsIF{\RefBuffer[k][\PicNum]==n}
113 \bsFOR{j=k}{\RefBufferSize-2}
114 \bsCODE{\RefBuffer[j]=\RefBuffer[j+1]}
115 \bsEND
116 \bsCODE{\RefBufferSize -= 1}
117 \bsEND
118 \bsEND
119 \end{pseudo}
121 The $get\_ref(n)$ function shall returns the reference picture in the buffer with
122 picture number $n$. If there is no such picture it shall return an all-zero picture.
124 The $ref\_picture\_add()$ process for adding pictures to the reference picture
125 buffer shall proceed according to the following rules:
127 {\bf Case 1.} If the reference picture buffer is not full i.e. has fewer than $\MaxRefBufferSize$ elements,
128 then add $\CurrentPicture$ to the end of the buffer.
130 {\bf Case 2.} If the reference picture is full i.e. it has $\MaxRefBufferSize$ elements, then remove the
131 first (i.e. oldest) element of the buffer, $\RefBuffer[0]$, set
132 \[\RefBuffer[i] = \RefBuffer[i+1] \]
133 for $i=0$ to $\RefBufferSize-2$, and set the last element $\RefBuffer[\RefBufferSize-1]$ equal to
134 a copy of $\CurrentPicture$.
136 \input{idwt}
138 \subsection{Motion compensation}
139 \input{mc}
141 \subsection{Clipping}
142 \label{pictureclip}
144 Picture data must be clipped prior to being output or being
145 used as a reference:
147 \begin{pseudo}{clip\_picture}{}
148 \bsFOREACH{c}{Y,C1,C2}
149 \bsCODE{clip\_component(\CurrentPicture[c])}
150 \bsEND
151 \end{pseudo}
154 \begin{pseudo}{clip\_component}{comp\_data,c}
155 \bsIF{c==Y}
156 \bsCODE{bit\_depth=\LumaDepth}
157 \bsELSE
158 \bsCODE{bit\_depth=\ChromaDepth}
159 \bsEND
160 \bsFOR{y=0}{\height(comp\_data)-1}
161 \bsFOR{x=0}{\width(comp\_data)-1}
162 \bsCODE{data = \clip(comp\_data[y][x], -2^{bit\_depth-1}, 2^{bit\_depth-1}-1)}
163 \bsEND
164 \bsEND
165 \end{pseudo}
167 \begin{informative}
168 Note that clipping is incorporated into motion compensation, so that strictly speaking additional
169 clipping is only required for intra pictures.
170 \end{informative}
172 \subsection{Video output ranges}
173 \label{videooutput}
175 Video output data ranges are deemed to be non-negative, so that the offset and excursion
176 values may be applied by subsequent processing. Since decoded video data is bipolar, it must be suitably offset before output:
178 \begin{pseudo}{offset\_output\_data}{picture\_data}
179 \bsFOREACH{c}{Y, C1, C2}
180 \bsIF{c==Y}
181 \bsCODE{bit\_depth=\LumaDepth}
182 \bsELSE
183 \bsCODE{bit\_depth=\ChromaDepth}
184 \bsEND
185 \bsCODE{comp=picture\_data[c]}
186 \bsFOR{y=0}{\height(comp)-1}
187 \bsFOR{x=0}{\width(comp)-1}
188 \bsCODE{comp[y][x]+=2^{bit\_depth-1}}
189 \bsEND
190 \bsEND
191 \bsEND
192 \end{pseudo}