Include a pre-rendered pdf version
[dirac-spec-errata.git] / idwt.tex
blob3b069d9e922bd6c0fdd537e462388c25c428dcbd
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 % - This chapter defines how the inverse discrete - %
3 % - wavelet transform is done - %
4 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
7 \subsection{Picture IDWT}
9 The inverse discrete wavelet transform process shall consist of transforming the
10 wavelet coefficients for each of the video components. It shall be defined as follows:
12 \begin{pseudo}{inverse\_wavelet\_transform}{}
13 \bsCODE{\CurrentPicture[Y]=idwt(\YTransform)}{\ref{idwt}}
14 \bsCODE{\CurrentPicture[C1]=idwt(\COneTransform)}{\ref{idwt}}
15 \bsCODE{\CurrentPicture[C2]=idwt(\CTwoTransform)}{\ref{idwt}}
16 \bsFOREACH{c}{Y,C1,C2}
17 \bsCODE{idwt\_pad\_removal(\CurrentPicture[c],c)}{\ref{padremoval}}
18 \end{pseudo}
20 \subsection{Component IDWT}
21 \label{idwt}
23 This section defines the $idwt(coeff\_data)$ process for reconstructing picture
24 component data from decoded subband data $coeff\_data$ using the inverse discrete wavelet transform (IDWT). The IDWT shall be invoked in the picture decoding process only after successful unpacking of the subband coefficient data (Section \ref{transformdec}).
26 The IDWT process shall return a pixel array from the subband wavelet coefficients representing a reconstructed video component (Y, C1 or C2) for a single picture.
29 Since wavelet filtering operates on both rows and columns of two-dimensional arrays
30 independently it is useful to define operators $\row(a,k)$ and $\column(a,k)$ for
31 extracting rows and columns with index $k$ from a 2-dimensional array $a$:
33 If $b=\row(a,k)$ then $b[r]$ is a {\em reference} to the value of $a[k][r]$. This means that modifying the
34 value of $b[r]$ modifies the value of $a[k][r]$.
36 If $b=\column(a,k)$ then $b[r]$ is a {\em reference} the value of $a[r][k]$. This means that modifying the
37 value of $b[r]$ modifies the value of $a[r][k]$.
39 The $idwt()$ process shall be an iterative procedure operating on four subbands
40 (\LL, \HL,\LH and \HH) at each iteration stage to produce a new subband. The procedure
41 shall be as follows
43 \begin{pseudo}{idwt}{coeff\_data}
44 \bsCODE{LL\_band = coeff\_data[0][\LL]}
45 \bsFOR{n=1}{\TransformDepth}
46 \bsCODE{
47 new\_LL= vh\_synth(LL\_band, coeff\_data[n][\HL], coeff\_data[n][\LH],
48 coeff\_data[n][\HH])
49 }{\ref{vhsynth}}
50 \bsCODE{LL\_band=new\_LL}
51 \bsEND
52 \bsRET{LL\_band}
53 \end{pseudo}
55 Note that at each stage, the input dimensions of the input $LL\_band$ will be the same
56 as those of the other input bands, whereas the output dimensions are double those of the input bands.
58 \subsubsection{Vertical and horizontal synthesis}
59 \label{vhsynth}
61 This section specifies the operation of the vertical and horizontal
62 synthesis process:
64 $vh\_synth(LL\_data, HL\_data, LH\_data, HH\_data)$
66 $vh\_synth$ shall return an array of twice the dimensions of each of the input
67 argument arrays.
69 $vh\_synth$ is repeatedly invoked by the IDWT synthesis process and operates on four subband data arrays of identical dimensions to produce a new array $synth$,
70 which shall be returned as the result of the process.
72 {\bf Step 1.} $synth$ is a temporary two-dimensional array that shall be initialised so that:
73 \begin{eqnarray*}
74 \width(synth) & = & 2*\width(LL\_data) \\
75 \height(synth) & = & 2*\height(LL\_data)
76 \end{eqnarray*}
78 {\bf Step 2.} The data from the four arrays shall be interleaved as follows:
80 \begin{pseudo*}
81 \bsFOR{y=0}{(\height(synth)//2)-1}
82 \bsFOR{x=0}{(\width(synth)//2)-1}
83 \bsCODE{synth[2*y][2*x] = LL\_data[y][x]}
84 \bsCODE{synth[2*y][2*x+1] = HL\_data[y][x]}
85 \bsCODE{synth[2*y+1][2*x] = LH\_data[y][x]}
86 \bsCODE{synth[2*y+1][2*x+1] = HH\_data[y][x]}
87 \bsEND
88 \bsEND
89 \bsCODE{\hdots}
90 \end{pseudo*}
92 Note: This enables in-place calculation during the inverse filter process.
94 {\bf Step 3.} Data shall be synthesised vertically by operating on each column
95 of data using a one-dimensional filter, and then horizontally by operating
96 on each row. The one-dimensional filters used shall be determined by
97 the value of $\WaveletIndex$ according to Tables \ref{filtertable0}--\ref{filtertable6}.
98 The process shall be as follows:
100 \begin{pseudo*}
101 \bsFOR{x=0}{\width(synth)-1}
102 \bsCODE{1d\_synthesis(\column(synth, x) )}{\ref{onedsynth}}
103 \bsEND
104 \bsFOR{y=0}{\height(synth)-1}
105 \bsCODE{1d\_synthesis(\row(synth, y) )}{\ref{onedsynth}}
106 \bsEND
107 \bsCODE{\hdots}
108 \end{pseudo*}
110 {\bf Step 4.} Finally, the synthesised subband data shall implement a bitshift to
111 remove any accuracy bits. The bit shift value $filtershift()$ used shall be determined
112 by the value of $\WaveletIndex$ according to Tables \ref{filtertable0}--\ref{filtertable6}.
113 The process shall be as follows:
115 \begin{pseudo*}
116 \bsCODE{shift = filtershift()}
117 \bsIF{shift>0}
118 \bsFOR{y=0}{\height(synth)-1}
119 \bsFOR{x=0}{\width(synth)-1}
120 \bsCODE{synth[y][x] = (synth[y][x] + (1<<(shift-1)))\gg shift}
121 \bsEND
122 \bsEND
123 \bsEND
124 \end{pseudo*}
126 \begin{informative}
127 Accuracy bits are added in the encoder by shifting up all coefficients in the LL band
128 prior to applying any filtering (this includes an initial shift of all values in the component
129 data). Adding a small shift before each decomposition stage is the most efficient way of
130 providing additional resolution mitigating aliasing through non-linear rounding effects.
131 \end{informative}
133 \subsubsection{One-dimensional synthesis}
134 \label{onedsynth}
136 This section specifies the one-dimensional synthesis process
137 $1d\_synthesis()$, which shall apply to a 1-dimensional array of coefficients
138 of even length, consisting of either a row or a column of a 2-dimensional integral data array.
140 The one-dimensional synthesis process shall comprise the application of a
141 number of reversible integer lifting filter operations.
143 Lifting filtering operations shall be one of four types, Type 1, Type 2, Type 3 and
144 Type 4. Each type shall be characterised by four elements:
145 \begin{itemize}
146 \item a filter length value $L$
147 \item a filter offset value $D$
148 \item an array of taps of length $L$: $taps[0],\ldots,taps[L-1]$
149 \item a scale factor $S$
150 \end{itemize}
152 The four types of lifting operations shall be defined by the functions:
153 \begin{description}
154 \item[] $lift1(A,L,D,taps,S)$,
155 \item[] $lift2(A,L,D,taps,S)$,
156 \item[] $lift3(A,L,D,taps,S)$, and
157 \item[] $lift4(A,L,D,taps,S)$.
158 \end{description}
159 respectively and shall act upon the values in a one-dimensional array $A$.
161 The Type 1 lifting process $lift1(A,L,D,taps,S)$ shall be defined as follows:
163 \begin{pseudo}{lift1}{A,L,D,taps,S}
164 \bsFOR{n=0}{(\length(A)//2)-1}
165 \bsCODE{sum=0}
166 \bsFOR{i=D}{L+D-1}
167 \bsCODE{pos=2*(n+i)-1}
168 \bsCODE{pos=\min(pos, \length(A)-1)}
169 \bsCODE{pos=\max(pos, 1)}
170 \bsCODE{sum+=taps[i-D]*A[pos]}
171 \bsEND
172 \bsIF{S>0}
173 \bsCODE{sum+=(1\ll (S-1))}
174 \bsEND
175 \bsCODE{A[2*n]+=(sum\gg S)}
176 \bsEND
177 \end{pseudo}
179 The Type 2 lifting process $lift2(A,L,D,taps,S)$ shall be defined as follows:
181 \begin{pseudo}{lift2}{A,L,D,taps,S}
182 \bsFOR{n=0}{(\length(A)//2)-1}
183 \bsCODE{sum=0}
184 \bsFOR{i=D}{L+D-1}
185 \bsCODE{pos=2*(n+i)-1}
186 \bsCODE{pos=\min(pos, \length(A)-1)}
187 \bsCODE{pos=\max(pos, 1)}
188 \bsCODE{sum+=taps[i-D]*A[pos]}
189 \bsEND
190 \bsIF{S>0}
191 \bsCODE{sum+=(1\ll (S-1))}
192 \bsEND
193 \bsCODE{A[2*n]-=(sum\gg S)}
194 \bsEND
195 \end{pseudo}
197 The Type 3 lifting process $lift3(A,L,D,taps,S)$ shall be defined as follows:
199 \begin{pseudo}{lift3}{A,L,D,taps,S}
200 \bsFOR{n=0}{(\length(A)//2)-1}
201 \bsCODE{sum=0}
202 \bsFOR{i=D}{L+D-1}
203 \bsCODE{pos=2*(n+i)}
204 \bsCODE{pos=\min(pos, \length(A)-2)}
205 \bsCODE{pos=\max(pos, 0)}
206 \bsCODE{sum+=taps[i-D]*A[pos]}
207 \bsEND
208 \bsIF{S>0}
209 \bsCODE{sum+=(1\ll (S-1))}
210 \bsEND
211 \bsCODE{A[2*n+1]+=(sum\gg S)}
212 \bsEND
213 \end{pseudo}
215 The Type 4 lifting process $lift4(A,L,D,taps,S)$ shall be defined as follows:
217 \begin{pseudo}{lift4}{A,L,D,taps,S}
218 \bsFOR{n=0}{(\length(A)//2)-1}
219 \bsCODE{sum=0}
220 \bsFOR{i=D}{L+D-1}
221 \bsCODE{pos=2*(n+i)}
222 \bsCODE{pos=\min(pos, \length(A)-2)}
223 \bsCODE{pos=\max(pos, 0)}
224 \bsCODE{sum+=taps[i-D]*A[pos]}
225 \bsEND
226 \bsIF{S>0}
227 \bsCODE{sum+=(1\ll (S-1))}
228 \bsEND
229 \bsCODE{A[2*n+1]-=(sum\gg S)}
230 \bsEND
231 \end{pseudo}
233 $1d\_synthesis$ shall apply the sequence of lifting filters specified in Section \ref{wltfilters}
234 corresponding to the value of $\WaveletIndex$,and shall invoke the corresponding lifting processes with the parameters defined.
236 \paragraph{Mathematical formulation of lifting processes (Informative)}
237 $\ $\newline
239 The lifting processes defined in the previous section are extremely similar, and
240 careful attention should be paid to the detail of their operation in any implementation.
241 The four different variants arise from two factors: the ‘phase’ (odd or
242 even) of the lifting operation, and their implementation using integer-only
243 operations, which introduces rounding errors and makes addition and subtraction
244 subtly different.
246 A lifting operation either modifies the odd coefficients by a linear combination of the
247 even coefficients, or vice-versa. Mathematically, the four types of filter may be
248 described as follows.
250 Type 1 and Type 2 lifting filtering operations modify the even coefficients
251 by the odd coefficients:
252 \begin{eqnarray*}
253 A[2*n]& +=& \left( \sum^M_{i=-N} t_i *A[2*(n+i) + 1] +(1\ll (s-1))\right) \gg s \mbox{ (Type 1)} \\
254 A[2*n]& -=& \left( \sum^M_{i=-N} t_i *A[2*(n+i) + 1] +(1\ll (s-1))\right) \gg s \mbox{ (Type 2)}
255 \end{eqnarray*}
257 Type 3 and Type 4 lifting filtering operation modify the odd coefficients
258 by the even coefficients:
259 \begin{eqnarray*}
260 A[2*n+1]& +=& \left( \sum^M_{i=-N} t_i A[2*(n+i)]+(1\ll (s-1)) \right) \gg s \mbox{ (Type 3)} \\
261 A[2*n+1]& -=& \left( \sum^M_{i=-N} t_i A[2*(n+i)] +(1\ll (s-1))\right) \gg s \mbox{ (Type 4)} \\
262 \end{eqnarray*}
264 The distinctions between Type 1 and Type 2 and between
265 Type 3 and Type 4 filters are necessary
266 because integer division (bit-shifting) is being used, and so the filters are non-linear:
267 a Type 1 or Type 3 filter with taps $t_i$ is {\em not} equivalent to
268 an Type 2 or Type 4 filter with taps $-t_i$.
270 Edge extension is used where the filter would otherwise extend beyond the
271 boundaries of the array. This is slightly different between Types 1 and 2 on the
272 one hand and Types 3 and 4 on the other. This is because even values and
273 odd values must be extended separately to maintain the correct phase (and hence invertibility of the filter). For example, a Type 1 filter must use the values 1 and $\length(A)-1$ at the edges because (as the length is even) these are the odd values nearest the edges.
275 Further information on wavelet filtering and lifting is provided in Annex
276 \ref{transformandlifting}.
278 \subsubsection{Lifting filter parameters}
279 \label{wltfilters}
281 The lifting filters and filter bit-shift operations that apply for each value
282 $\WaveletIndex$ shall be as specified in Tables \ref{filtertable0} to \ref{filtertable6}
283 below.
285 \begin{table}[h]
286 \begin{tabular}{|l|}
288 \hline
289 Lifting steps: \\
290 \begin{tabular}{l}
291 1. Type 2, $L=2, D=0, taps=[1,1], S=2$ i.e. \\
292 \quad $ A[2*n] -= (A[2*n-1]+A[2*n+1]+2)\gg 2$ \\
293 2. Type 3, $L=4, D=-1, taps=[-1,9,9,-1], S=4$ i.e. \\
294 \quad $ A[2*n+1] += (-A[2*n-2]+9*A[2*n]+9*A[2*n+2]-A[2*n+4]+8)\gg 4$
295 \end{tabular}
298 $filtershift()$ returns 1\\
299 \hline
301 \end{tabular}
302 \caption{$\WaveletIndex==0$: Deslauriers-Dubuc (9,7) lifting stages and shift values}\label{filtertable0}
303 \end{table}
305 \begin{table}[h]
307 \begin{tabular}{|l|}
308 \hline
309 Lifting steps: \\
311 \begin{tabular}{l}
312 1. Type 2, $L=2, D=0, taps=[1,1], S=2$ i.e. \\
313 \quad $ A[2*n] -= (A[2*n-1]+A[2*n+1]+2)\gg 2$ \\
314 2. Type 3, $L=2, D=0, taps=[1,1], S=1$ i.e. \\
315 \quad $ A[2*n+1] += (A[2*n]+A[2*n+2]+1)\gg 1$
316 \end{tabular}
319 $filtershift()$ returns 1\\
320 \hline
322 \end{tabular}
323 \caption{$\WaveletIndex==1$: LeGall (5,3) lifting stages and shift values}
324 \end{table}
326 \begin{table}[h]
327 \begin{tabular}{|l|}
329 \hline
330 Lifting steps: \\
331 \begin{tabular}{l}
332 1. Type 2, $L=4, D=-1, taps=[-1,9,9,-1], S=5$ i.e. \\
333 \quad $ A[2*n] -= (-A[2*n-3]+9*A[2*n-1]+9*A[2*n+1]-A[2*n+3]+16)\gg 5$ \\
334 2. Type 3, $L=4, D=-1, taps=[-1,9,9,-1], S=4$ i.e. \\
335 \quad $ A[2*n+1] += (-A[2*n-2]+9*A[2*n]+9*A[2*n+2]-A[2*n+4]+8)\gg 4$
336 \end{tabular}
339 $filtershift()$ returns 1\\
340 \hline
342 \end{tabular}
343 \caption{$\WaveletIndex==2$: Deslauriers-Dubuc (13,7) lifting stages and shift values}
344 \end{table}
346 \begin{table}[h]
347 \begin{tabular}{|l|}
349 \hline
350 Lifting steps:\\
351 \begin{tabular}{l}
352 1. Type 2, $L=1, D=1, taps=[1], S=1$ i.e. \\
353 \quad $A[2*n] -= (A[2*n+1]+1)\gg 1$ \\
354 2. Type 3, $L=1, D=0, taps=[1], S=0$ i.e. \\
355 \quad $ A[2*n+1] += A[2*n]$
356 \end{tabular}
359 $filtershift()$ returns 0\\
360 \hline
362 \end{tabular}
363 \caption{$\WaveletIndex==3$: Haar filter with no shift}
364 \end{table}
366 \begin{table}[h]
367 \begin{tabular}{|l|}
369 \hline
370 Lifting steps:\\
371 \begin{tabular}{l}
372 1. Type 2, $L=1, D=1, taps=[1], S=1$ i.e. \\
373 \quad $A[2*n] -= (A[2*n+1]+1)\gg 1$ \\
374 2. Type 3, $L=1, D=0, taps=[1], S=0$ i.e. \\
375 \quad $ A[2*n+1] += A[2*n]$
376 \end{tabular}
379 $filtershift()$ returns 1\\
380 \hline
382 \end{tabular}
383 \caption{$\WaveletIndex==4$: Haar filter with single shift}
384 \end{table}
386 \begin{table}[h]
387 \begin{tabular}{|l|}
388 \hline
389 Lifting steps:\\
390 \begin{tabular}{l}
391 1. Type 3, $L=8, D=-3, taps=[]-2,10,-25,81,81,-25,10,-2], S=8$ i.e. \\
392 $ \begin{array}{rcl} A[2*n+1] & += & (-2*(A[2*n-6]+A[2*n+8])+10*(A[2*n-4]+A[2*n+6])\\
393 & & -25*(A[2*n-2]+A[2*n+4])+81*(A[2*n]+A[2*n+2])+128)\gg 8 \end{array}$ \\
394 1. Type 2, $L=8, D=-3, taps=[-8,21,-46,161,161,-46,21,-8], S=8$ i.e. \\
395 $ \begin{array}{rcl}A[2*n] & -= & (-8*(A[2*n-7]+A[2*n+7])+21*(A[2*n-5]+A[2*n+5]) \\
396 & & -46*(A[2*n-3]+A[2*n+3])+161*(A[2*n-1]+A[2*n+1])+128)\gg 8 \end{array}$
397 \end{tabular}
400 $filtershift()$ returns 0\\
401 \hline
403 \end{tabular}
404 \caption{$\WaveletIndex==5$: Fidelity filter for improved downconversion and anti-aliasing}
405 \end{table}
407 \begin{table}[h]
408 \begin{tabular}{|l|}
410 \hline
411 Lifting steps:\\
412 \begin{tabular}{l}
413 1. Type 2, $L=2, D=0, taps=[1817,1817], S=12$ i.e. \\
414 \quad $A[2*n] -= (1817*A[2*n-1]+1817*A[2*n+1]+2048)\gg 12$ \\
415 2. Type 4, $L=2, D=0, taps=[3616,3616], S=12$ i.e. \\
416 \quad $A[2*n+1] -= (3616*A[2*n]+3616*A[2*n+2]+2048)\gg 12$ \\
417 3. Type 1, $L=2, D=0, taps=[217,217], S=12$ i.e. \\
418 \quad $A[2*n] += (217*A[2*n-1]+217*A[2*n+1]+2048)\gg 12$ \\
419 4. Type 3, $L=2, D=0, taps=[6497,6497], S=12$ i.e. \\
420 \quad $ A[2*n+1] += (6497*A[2*n]+6497*A[2*n+2]+2048)\gg 12$
421 \end{tabular}
424 $filtershift()$ returns 1\\
425 \hline
427 \end{tabular}
428 \caption{$\WaveletIndex==6$: Integer lifting approximation to Daubechies (9,7)}\label{filtertable6}
429 \end{table}
430 \clearpage
431 \subsection{Removal of IDWT pad values}
432 \label{padremoval}
434 This section defines the decoding process $idwt\_pad\_removal(pic, c)$
435 for removing extraneous values after performing the IDWT.
437 Section \ref{wltinit} requires that subband coefficient data arrays are padded to ensure that
438 the reconstructed data array $pic$ has dimensions divisible by $2^\TransformDepth$.
440 Values $width$ and $height$ are defined to be the appropriate dimensions
441 of the component data:
443 \begin{itemize}
444 \item If $c=Y$, then
445 \begin{eqnarray*}
446 width & =& \LumaWidth \\
447 height & =& \LumaHeight
448 \end{eqnarray*}
449 \item else if $c=C1$ or $c=C2$,
450 \begin{eqnarray*}
451 width & =& \ChromaWidth \\
452 height & =& \ChromaHeight
453 \end{eqnarray*}
454 \end{itemize}
456 All component data $pic[j][i]$ with
458 \begin{itemize}
459 \item $i\geq width$, or
460 \item $j\geq height$
461 \end{itemize}
463 shall be discarded and $pic$ shall be resized to have width $width$ and height $height$.