Typo in softfloat_reciprocal comment.
[FFMpeg-mirror/lagarith.git] / libavcodec / lagarith.c
blob8071fdb84315fd3e4c57f511b5374f96eb5e5e9e
1 /*
2 * Lagarith lossless decoder
3 * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/lagarith.c
24 * Lagarith lossless decoder
25 * @author Nathan Caldwell
29 #include "avcodec.h"
30 #include "get_bits.h"
31 #include "mathops.h"
32 #include "dsputil.h"
33 #include "lagarithrac.h"
35 enum LagarithFrameType {
36 FRAME_RAW = 1, /*!< Uncompressed */
37 FRAME_U_RGB24 = 2, /*!< Unaligned RGB24 */
38 FRAME_ARITH_YUY2 = 3, /*!< Arith coded YUY2 */
39 FRAME_ARITH_RGB24 = 4, /*!< Arith coded RGB24 */
40 FRAME_SOLID_GRAY = 5, /*!< Solid grayscale color frame */
41 FRAME_SOLID_COLOR = 6, /*!< Solid non-grayscale color frame */
42 FRAME_OLD_ARITH_RGB = 7, /*!< Obsolete arithmetic coded RGB (Maintained for backwards compatibility) */
43 FRAME_ARITH_RGBA = 8, /*!< Arithmetic coded RGBA */
44 FRAME_SOLID_RGBA = 9, /*!< Solid RGBA color frame */
45 FRAME_ARITH_YV12 = 10, /*!< Arithmetic coded YV12 */
46 FRAME_REDUCED_RES = 11, /*!< Reduced resolution frame */
49 typedef struct LagarithContext {
50 AVCodecContext *avctx;
51 AVFrame picture;
52 DSPContext dsp;
53 int zeros; /*!< number of consecutave zero bytes encountered */
54 int zeros_rem; /*!< number of zero bytes remaining to output */
55 } LagarithContext;
57 /**
58 * Compute the 52bit mantissa of 1/(double)denom.
59 * This is used because it is more portable across architectures and
60 * toolchains than using floats.
61 * @param denom denominator
62 * @return 52bit mantissa
63 * @see softfloat_mul
65 static uint64_t softfloat_reciprocal(uint32_t denom)
67 int shift = av_log2(denom - 1) + 1;
68 uint64_t ret = (1ULL << 52) / denom;
69 uint64_t err = (1ULL << 52) - ret * denom;
70 ret <<= shift;
71 err <<= shift;
72 err += denom / 2;
73 return ret + err / denom;
76 /**
77 * (uint32_t)(x*f), where f has the given mantissa, and exponent 0
78 * Used in combination with softfloat_reciprocal computes x/(double)denom.
79 * @param x 32bit integer factor
80 * @param mantissa mantissa of f with exponent 0
81 * @return 32bit integer value (x*f)
82 * @see softfloat_reciprocal
84 static uint32_t softfloat_mul(uint32_t x, uint64_t mantissa)
86 uint64_t l = x * (mantissa & 0xffffffff);
87 uint64_t h = x * (mantissa >> 32);
88 h += l >> 32;
89 l &= 0xffffffff;
90 l += 1 << av_log2(h >> 21);
91 h += l >> 32;
92 return h >> 20;
95 static void lag_memset(uint8_t *s, uint8_t c, size_t n, int step)
97 int i;
98 if (step == 1) {
99 memset(s, c, n);
100 return;
103 for (i = 0; i < n * step; i += step)
104 s[i] = c;
107 static uint8_t *lag_memcpy(uint8_t *dest, const uint8_t *src, size_t n,
108 int step)
110 int i, j;
111 if (step == 1)
112 return memcpy(dest, src, n);
114 for (i = j = 0; i < n; i++) {
115 dest[j] = src[i];
116 j += step;
118 return dest;
121 static uint8_t lag_calc_zero_run(int8_t x)
123 return (x << 1) ^ (x >> 7);
126 static int lag_decode_prob(GetBitContext *gb, uint32_t *value)
128 static const uint8_t series[] = { 1, 2, 3, 5, 8, 13, 21 };
129 int i;
130 int bit = 0;
131 int bits = 0;
132 int prevbit = 0;
133 unsigned val;
135 for (i = 0; i < 7; i++) {
136 if (prevbit && bit)
137 break;
138 prevbit = bit;
139 bit = get_bits1(gb);
140 if (bit && !prevbit)
141 bits += series[i];
143 bits--;
144 if (bits < 0 || bits > 31) {
145 *value = 0;
146 return -1;
147 } else if (bits == 0) {
148 *value = 0;
149 return 0;
152 val = get_bits_long(gb, bits);
153 val |= 1 << bits;
155 *value = val - 1;
157 return 0;
160 static int lag_read_prob_header(lag_rac *rac, GetBitContext *gb)
162 int i, j, scale_factor;
163 unsigned prob, cumulative_target;
164 unsigned cumul_prob = 0;
165 unsigned scaled_cumul_prob = 0;
167 rac->prob[0] = 0;
168 rac->prob[257] = UINT_MAX;
169 /* Read probabilities from bitstream */
170 for (i = 1; i < 257; i++) {
171 if (lag_decode_prob(gb, &rac->prob[i]) < 0) {
172 av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability encountered.\n");
173 return -1;
175 cumul_prob += rac->prob[i];
176 if (!rac->prob[i]) {
177 if (lag_decode_prob(gb, &prob)) {
178 av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability run encountered.\n");
179 return -1;
181 if (prob > 257 - i)
182 prob = 257 - i;
183 for (j = 0; j < prob; j++)
184 rac->prob[++i] = 0;
188 if (!cumul_prob) {
189 av_log(rac->avctx, AV_LOG_ERROR, "All probabilities are 0!\n");
190 return -1;
193 /* Scale probabilities so cumulative probability is an even power of 2. */
194 scale_factor = av_log2(cumul_prob);
196 if (cumul_prob & (cumul_prob - 1)) {
197 uint64_t mul = softfloat_reciprocal(cumul_prob);
198 for (i = 1; i < 257; i++) {
199 rac->prob[i] = softfloat_mul(rac->prob[i], mul);
200 scaled_cumul_prob += rac->prob[i];
203 scale_factor++;
204 cumulative_target = 1 << scale_factor;
206 if (scaled_cumul_prob > cumulative_target) {
207 av_log(rac->avctx, AV_LOG_ERROR,
208 "Scaled probabilities are larger than target!\n");
209 return -1;
212 scaled_cumul_prob = cumulative_target - scaled_cumul_prob;
214 for (i = 1; scaled_cumul_prob; i = (i & 0x7f) + 1) {
215 if (rac->prob[i]) {
216 rac->prob[i]++;
217 scaled_cumul_prob--;
219 /* Comment from reference source:
220 * if (b & 0x80 == 0) { // order of operations is 'wrong'; it has been left this way
221 * // since the compression change is negligable and fixing it
222 * // breaks backwards compatibilty
223 * b =- (signed int)b;
224 * b &= 0xFF;
225 * } else {
226 * b++;
227 * b &= 0x7f;
233 rac->scale = scale_factor;
235 /* Fill probability array with cumulative probability for each symbol. */
236 for (i = 1; i < 257; i++)
237 rac->prob[i] += rac->prob[i - 1];
239 return 0;
242 static void add_lag_median_prediction(uint8_t *dst, uint8_t *src1,
243 uint8_t *diff, int w, int *left,
244 int *left_top)
246 /* This is almost identical to add_hfyu_median_prediction in dsputil.h.
247 * However the &0xFF on the gradient predictor yealds incorrect output
248 * for lagarith.
250 int i;
251 uint8_t l, lt;
253 l = *left;
254 lt = *left_top;
256 for (i = 0; i < w; i++) {
257 l = mid_pred(l, src1[i], l + src1[i] - lt) + diff[i];
258 lt = src1[i];
259 dst[i] = l;
262 *left = l;
263 *left_top = lt;
266 static void lag_pred_line(LagarithContext *l, uint8_t *buf,
267 int width, int stride, int step, int line)
269 int i = 0;
270 int L, TL;
271 int width_scaled = width * step;
273 if (!line) {
274 /* Left prediction only for first line */
275 L = l->dsp.add_hfyu_left_prediction(buf + step, buf + step,
276 width - step, buf[0]);
277 return;
278 } else if (line == 1) {
279 /* Second line, left predict first pixel, the rest of the line is median predicted */
280 /* FIXME: In the case of RGB this pixel is top predicted */
281 TL = buf[-stride];
282 L = l->dsp.add_hfyu_left_prediction(buf, buf, 1,
283 buf[width_scaled - stride - step]);
284 i += step;
285 } else {
286 /* Left pixel is actually prev_row[width] */
287 L = buf[width_scaled - stride - step];
288 /* Top left is 2 rows back, last pixel */
289 TL = buf[width_scaled - (2 * stride) - step];
292 if (i < width_scaled)
293 add_lag_median_prediction(buf + i, buf - stride + i, buf + i,
294 width_scaled - i, &L, &TL);
297 static int lag_decode_line(LagarithContext *l, lag_rac *rac,
298 uint8_t *dst, int width, int stride,
299 int step, int esc_count)
301 int i = 0;
302 int ret = 0;
304 if (!esc_count)
305 esc_count = -1;
307 /* Output any zeros remaining from the previous run */
308 handle_zeros:
309 if (l->zeros_rem) {
310 int count = FFMIN(l->zeros_rem, width - i);
311 lag_memset(dst + i * step, 0, count, step);
312 i += count;
313 l->zeros_rem -= count;
316 while (i < width) {
318 dst[i * step] = lag_get_rac(rac);
319 ret++;
321 if (dst[i * step])
322 l->zeros = 0;
323 else
324 l->zeros++;
326 i++;
327 if (l->zeros == esc_count) {
328 int index = lag_get_rac(rac);
329 ret++;
331 l->zeros = 0;
333 l->zeros_rem = lag_calc_zero_run(index);
334 goto handle_zeros;
337 return ret;
340 static int lag_decode_zero_run_line(LagarithContext *l, uint8_t *dst,
341 const uint8_t *src, int width,
342 int step, int esc_count)
344 int i = 0;
345 int count;
346 uint8_t zero_run = 0;
347 const uint8_t *start = src;
348 uint8_t mask1 = -(esc_count < 2);
349 uint8_t mask2 = -(esc_count < 3);
350 uint8_t *end = dst + (width - 2) * step;
352 output_zeros:
353 if (l->zeros_rem) {
354 count = FFMIN(l->zeros_rem, width - i);
355 lag_memset(dst, 0, count, step);
356 l->zeros_rem -= count;
357 dst += count;
360 while (dst < end) {
361 i = 0;
362 while (!zero_run && dst + i * step < end) {
363 i++;
364 zero_run =
365 !(src[i] | (src[i + 1] & mask1) | (src[i + 2] & mask2));
367 if (zero_run) {
368 zero_run = 0;
369 i += esc_count;
370 lag_memcpy(dst, src, i, step);
371 dst += i;
372 l->zeros_rem = lag_calc_zero_run(src[i]);
374 src += i + 1;
375 goto output_zeros;
376 } else {
377 lag_memcpy(dst, src, i, step);
378 src += i;
381 return start - src;
386 static int lag_decode_arith_plane(LagarithContext *l, uint8_t *dst,
387 int width, int height, int stride,
388 int step, const uint8_t *src,
389 int src_size)
391 int i = 0;
392 int read = 0;
393 uint32_t length;
394 uint32_t offset = 1;
395 int esc_count = src[0];
396 GetBitContext gb;
397 lag_rac rac;
399 rac.avctx = l->avctx;
400 l->zeros = 0;
402 if (esc_count < 4) {
403 length = width * height;
404 if (esc_count && AV_RL32(src + 1) < length) {
405 length = AV_RL32(src + 1);
406 offset += 4;
409 init_get_bits(&gb, src + offset, src_size * 8);
411 if (lag_read_prob_header(&rac, &gb) < 0)
412 return -1;
414 lag_rac_init(&rac, &gb, length - stride);
416 for (i = 0; i < height; i++)
417 read +=
418 lag_decode_line(l, &rac, dst + (i * stride), width,
419 stride, step, esc_count);
421 if (read > length)
422 av_log(l->avctx, AV_LOG_WARNING,
423 "Output more bytes than length (%d of %d)\n", read,
424 length);
425 } else if (esc_count < 8) {
426 esc_count -= 4;
427 if (esc_count > 0) {
428 /* Zero run coding only, no range coding. */
429 for (i = 0; i < height; i++)
430 src +=
431 lag_decode_zero_run_line(l, dst + (i * stride),
432 src, width, step, esc_count);
433 } else {
434 /* Plane is stored uncompressed */
435 for (i = 0; i < height; i++) {
436 lag_memcpy(dst + (i * stride), src, width, step);
437 src += width;
440 } else if (esc_count == 0xff) {
441 /* Plane is a solid run of 0 bytes */
442 for (i = 0; i < height; i++)
443 lag_memset(dst + i * stride, 0, width, step);
444 } else {
445 av_log(l->avctx, AV_LOG_ERROR,
446 "Invalid zero run escape code! (%#x)\n", esc_count);
447 return -1;
450 for (i = 0; i < height; i++) {
451 lag_pred_line(l, dst, width, stride, step, i);
452 dst += stride;
455 return 0;
459 * Decode a frame.
460 * @param avctx codec context
461 * @param data output AVFrame
462 * @param data_size size of output data or 0 if no picture is returned
463 * @param avpkt input packet
464 * @return number of consumed bytes on success or negative if decode fails
466 static int lag_decode_frame(AVCodecContext *avctx,
467 void *data, int *data_size, AVPacket *avpkt)
469 const uint8_t *buf = avpkt->data;
470 int buf_size = avpkt->size;
471 LagarithContext *l = avctx->priv_data;
472 AVFrame *const p = &l->picture;
473 uint8_t frametype = 0;
474 uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
476 AVFrame *picture = data;
478 if (!l->avctx)
479 l->avctx = avctx;
481 if (p->data[0])
482 avctx->release_buffer(avctx, p);
484 p->reference = 0;
485 p->key_frame = 1;
487 frametype = buf[0];
489 offset_gu = AV_RL32(buf + 1);
490 offset_bv = AV_RL32(buf + 5);
492 switch (frametype) {
493 case FRAME_ARITH_YV12:
494 avctx->pix_fmt = PIX_FMT_YUV420P;
496 if (avctx->get_buffer(avctx, p) < 0) {
497 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
498 return -1;
501 lag_decode_arith_plane(l, p->data[0], avctx->width, avctx->height,
502 p->linesize[0], 1, buf + offset_ry,
503 buf_size);
504 lag_decode_arith_plane(l, p->data[2], avctx->width / 2,
505 avctx->height / 2, p->linesize[2], 1,
506 buf + offset_gu, buf_size);
507 lag_decode_arith_plane(l, p->data[1], avctx->width / 2,
508 avctx->height / 2, p->linesize[1], 1,
509 buf + offset_bv, buf_size);
510 break;
511 default:
512 av_log(avctx, AV_LOG_ERROR,
513 "Unsupported Lagarith frame type: %#x\n", frametype);
514 return -1;
517 *picture = *p;
518 *data_size = sizeof(AVFrame);
520 return buf_size;
523 static av_cold int lag_decode_init(AVCodecContext *avctx)
525 LagarithContext *l = avctx->priv_data;
527 dsputil_init(&l->dsp, avctx);
529 return 0;
532 AVCodec lagarith_decoder = {
533 "lagarith",
534 CODEC_TYPE_VIDEO,
535 CODEC_ID_LAGARITH,
536 sizeof(LagarithContext),
537 lag_decode_init,
538 NULL,
539 NULL,
540 lag_decode_frame,
541 CODEC_CAP_DR1,
542 .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),