2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
10 * This code was originally written by: Nathan E. Egge, at the Daala
16 #include "./vpx_config.h"
17 #include "./vpx_dsp_rtcd.h"
18 #include "vpx_dsp/ssim.h"
19 #include "vpx_ports/system_state.h"
20 /* TODO(jbb): High bit depth version of this code needed */
21 typedef struct fs_level fs_level
;
22 typedef struct fs_ctx fs_ctx
;
24 #define SSIM_C1 (255 * 255 * 0.01 * 0.01)
25 #define SSIM_C2 (255 * 255 * 0.03 * 0.03)
27 #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
28 #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
44 static void fs_ctx_init(fs_ctx
*_ctx
, int _w
, int _h
, int _nlevels
) {
52 data_size
= _nlevels
* sizeof(fs_level
)
53 + 2 * (lw
+ 8) * 8 * sizeof(*_ctx
->col_buf
);
54 for (l
= 0; l
< _nlevels
; l
++) {
57 im_size
= lw
* (size_t) lh
;
58 level_size
= 2 * im_size
* sizeof(*_ctx
->level
[l
].im1
);
59 level_size
+= sizeof(*_ctx
->level
[l
].ssim
) - 1;
60 level_size
/= sizeof(*_ctx
->level
[l
].ssim
);
61 level_size
+= im_size
;
62 level_size
*= sizeof(*_ctx
->level
[l
].ssim
);
63 data_size
+= level_size
;
67 data
= (unsigned char *) malloc(data_size
);
68 _ctx
->level
= (fs_level
*) data
;
69 _ctx
->nlevels
= _nlevels
;
70 data
+= _nlevels
* sizeof(*_ctx
->level
);
73 for (l
= 0; l
< _nlevels
; l
++) {
76 _ctx
->level
[l
].w
= lw
;
77 _ctx
->level
[l
].h
= lh
;
78 im_size
= lw
* (size_t) lh
;
79 level_size
= 2 * im_size
* sizeof(*_ctx
->level
[l
].im1
);
80 level_size
+= sizeof(*_ctx
->level
[l
].ssim
) - 1;
81 level_size
/= sizeof(*_ctx
->level
[l
].ssim
);
82 level_size
*= sizeof(*_ctx
->level
[l
].ssim
);
83 _ctx
->level
[l
].im1
= (uint16_t *) data
;
84 _ctx
->level
[l
].im2
= _ctx
->level
[l
].im1
+ im_size
;
86 _ctx
->level
[l
].ssim
= (double *) data
;
87 data
+= im_size
* sizeof(*_ctx
->level
[l
].ssim
);
91 _ctx
->col_buf
= (unsigned *) data
;
94 static void fs_ctx_clear(fs_ctx
*_ctx
) {
98 static void fs_downsample_level(fs_ctx
*_ctx
, int _l
) {
100 const uint16_t *src2
;
109 w
= _ctx
->level
[_l
].w
;
110 h
= _ctx
->level
[_l
].h
;
111 dst1
= _ctx
->level
[_l
].im1
;
112 dst2
= _ctx
->level
[_l
].im2
;
113 w2
= _ctx
->level
[_l
- 1].w
;
114 h2
= _ctx
->level
[_l
- 1].h
;
115 src1
= _ctx
->level
[_l
- 1].im1
;
116 src2
= _ctx
->level
[_l
- 1].im2
;
117 for (j
= 0; j
< h
; j
++) {
121 j1offs
= FS_MINI(2 * j
+ 1, h2
) * w2
;
122 for (i
= 0; i
< w
; i
++) {
126 i1
= FS_MINI(i0
+ 1, w2
);
127 dst1
[j
* w
+ i
] = src1
[j0offs
+ i0
] + src1
[j0offs
+ i1
]
128 + src1
[j1offs
+ i0
] + src1
[j1offs
+ i1
];
129 dst2
[j
* w
+ i
] = src2
[j0offs
+ i0
] + src2
[j0offs
+ i1
]
130 + src2
[j1offs
+ i0
] + src2
[j1offs
+ i1
];
135 static void fs_downsample_level0(fs_ctx
*_ctx
, const unsigned char *_src1
,
136 int _s1ystride
, const unsigned char *_src2
,
137 int _s2ystride
, int _w
, int _h
) {
144 w
= _ctx
->level
[0].w
;
145 h
= _ctx
->level
[0].h
;
146 dst1
= _ctx
->level
[0].im1
;
147 dst2
= _ctx
->level
[0].im2
;
148 for (j
= 0; j
< h
; j
++) {
152 j1
= FS_MINI(j0
+ 1, _h
);
153 for (i
= 0; i
< w
; i
++) {
157 i1
= FS_MINI(i0
+ 1, _w
);
158 dst1
[j
* w
+ i
] = _src1
[j0
* _s1ystride
+ i0
]
159 + _src1
[j0
* _s1ystride
+ i1
] + _src1
[j1
* _s1ystride
+ i0
]
160 + _src1
[j1
* _s1ystride
+ i1
];
161 dst2
[j
* w
+ i
] = _src2
[j0
* _s2ystride
+ i0
]
162 + _src2
[j0
* _s2ystride
+ i1
] + _src2
[j1
* _s2ystride
+ i0
]
163 + _src2
[j1
* _s2ystride
+ i1
];
168 static void fs_apply_luminance(fs_ctx
*_ctx
, int _l
) {
169 unsigned *col_sums_x
;
170 unsigned *col_sums_y
;
181 w
= _ctx
->level
[_l
].w
;
182 h
= _ctx
->level
[_l
].h
;
183 col_sums_x
= _ctx
->col_buf
;
184 col_sums_y
= col_sums_x
+ w
;
185 im1
= _ctx
->level
[_l
].im1
;
186 im2
= _ctx
->level
[_l
].im2
;
187 for (i
= 0; i
< w
; i
++)
188 col_sums_x
[i
] = 5 * im1
[i
];
189 for (i
= 0; i
< w
; i
++)
190 col_sums_y
[i
] = 5 * im2
[i
];
191 for (j
= 1; j
< 4; j
++) {
192 j1offs
= FS_MINI(j
, h
- 1) * w
;
193 for (i
= 0; i
< w
; i
++)
194 col_sums_x
[i
] += im1
[j1offs
+ i
];
195 for (i
= 0; i
< w
; i
++)
196 col_sums_y
[i
] += im2
[j1offs
+ i
];
198 ssim
= _ctx
->level
[_l
].ssim
;
199 c1
= (double) (SSIM_C1
* 4096 * (1 << 4 * _l
));
200 for (j
= 0; j
< h
; j
++) {
205 mux
= 5 * col_sums_x
[0];
206 muy
= 5 * col_sums_y
[0];
207 for (i
= 1; i
< 4; i
++) {
208 i1
= FS_MINI(i
, w
- 1);
209 mux
+= col_sums_x
[i1
];
210 muy
+= col_sums_y
[i1
];
212 for (i
= 0; i
< w
; i
++) {
213 ssim
[j
* w
+ i
] *= (2 * mux
* (double) muy
+ c1
)
214 / (mux
* (double) mux
+ muy
* (double) muy
+ c1
);
216 i0
= FS_MAXI(0, i
- 4);
217 i1
= FS_MINI(i
+ 4, w
- 1);
218 mux
+= col_sums_x
[i1
] - col_sums_x
[i0
];
219 muy
+= col_sums_x
[i1
] - col_sums_x
[i0
];
223 j0offs
= FS_MAXI(0, j
- 4) * w
;
224 for (i
= 0; i
< w
; i
++)
225 col_sums_x
[i
] -= im1
[j0offs
+ i
];
226 for (i
= 0; i
< w
; i
++)
227 col_sums_y
[i
] -= im2
[j0offs
+ i
];
228 j1offs
= FS_MINI(j
+ 4, h
- 1) * w
;
229 for (i
= 0; i
< w
; i
++)
230 col_sums_x
[i
] += im1
[j1offs
+ i
];
231 for (i
= 0; i
< w
; i
++)
232 col_sums_y
[i
] += im2
[j1offs
+ i
];
237 #define FS_COL_SET(_col, _joffs, _ioffs) \
241 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
242 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
243 col_sums_gx2[(_col)] = gx * (double)gx; \
244 col_sums_gy2[(_col)] = gy * (double)gy; \
245 col_sums_gxgy[(_col)] = gx * (double)gy; \
249 #define FS_COL_ADD(_col, _joffs, _ioffs) \
253 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
254 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
255 col_sums_gx2[(_col)] += gx * (double)gx; \
256 col_sums_gy2[(_col)] += gy * (double)gy; \
257 col_sums_gxgy[(_col)] += gx * (double)gy; \
261 #define FS_COL_SUB(_col, _joffs, _ioffs) \
265 gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
266 gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
267 col_sums_gx2[(_col)] -= gx * (double)gx; \
268 col_sums_gy2[(_col)] -= gy * (double)gy; \
269 col_sums_gxgy[(_col)] -= gx * (double)gy; \
273 #define FS_COL_COPY(_col1, _col2) \
275 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \
276 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \
277 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \
281 #define FS_COL_HALVE(_col1, _col2) \
283 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \
284 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \
285 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \
289 #define FS_COL_DOUBLE(_col1, _col2) \
291 col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \
292 col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \
293 col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \
297 static void fs_calc_structure(fs_ctx
*_ctx
, int _l
) {
303 double col_sums_gx2
[8];
304 double col_sums_gy2
[8];
305 double col_sums_gxgy
[8];
312 w
= _ctx
->level
[_l
].w
;
313 h
= _ctx
->level
[_l
].h
;
314 im1
= _ctx
->level
[_l
].im1
;
315 im2
= _ctx
->level
[_l
].im2
;
316 ssim
= _ctx
->level
[_l
].ssim
;
317 gx_buf
= _ctx
->col_buf
;
319 gy_buf
= gx_buf
+ 8 * stride
;
320 memset(gx_buf
, 0, 2 * 8 * stride
* sizeof(*gx_buf
));
321 c2
= SSIM_C2
* (1 << 4 * _l
) * 16 * 104;
322 for (j
= 0; j
< h
+ 4; j
++) {
324 for (i
= 0; i
< w
- 1; i
++) {
329 g1
= abs(im1
[(j
+ 1) * w
+ i
+ 1] - im1
[j
* w
+ i
]);
330 g2
= abs(im1
[(j
+ 1) * w
+ i
] - im1
[j
* w
+ i
+ 1]);
331 gx
= 4 * FS_MAXI(g1
, g2
) + FS_MINI(g1
, g2
);
332 g1
= abs(im2
[(j
+ 1) * w
+ i
+ 1] - im2
[j
* w
+ i
]);
333 g2
= abs(im2
[(j
+ 1) * w
+ i
] - im2
[j
* w
+ i
+ 1]);
334 gy
= 4 * FS_MAXI(g1
, g2
) + FS_MINI(g1
, g2
);
335 gx_buf
[(j
& 7) * stride
+ i
+ 4] = gx
;
336 gy_buf
[(j
& 7) * stride
+ i
+ 4] = gy
;
339 memset(gx_buf
+ (j
& 7) * stride
, 0, stride
* sizeof(*gx_buf
));
340 memset(gy_buf
+ (j
& 7) * stride
, 0, stride
* sizeof(*gy_buf
));
344 col_sums_gx2
[3] = col_sums_gx2
[2] = col_sums_gx2
[1] = col_sums_gx2
[0] = 0;
345 col_sums_gy2
[3] = col_sums_gy2
[2] = col_sums_gy2
[1] = col_sums_gy2
[0] = 0;
346 col_sums_gxgy
[3] = col_sums_gxgy
[2] = col_sums_gxgy
[1] =
347 col_sums_gxgy
[0] = 0;
348 for (i
= 4; i
< 8; i
++) {
349 FS_COL_SET(i
, -1, 0);
351 for (k
= 1; k
< 8 - i
; k
++) {
353 FS_COL_ADD(i
, -k
- 1, 0);
357 for (i
= 0; i
< w
; i
++) {
361 mugx2
= col_sums_gx2
[0];
362 for (k
= 1; k
< 8; k
++)
363 mugx2
+= col_sums_gx2
[k
];
364 mugy2
= col_sums_gy2
[0];
365 for (k
= 1; k
< 8; k
++)
366 mugy2
+= col_sums_gy2
[k
];
367 mugxgy
= col_sums_gxgy
[0];
368 for (k
= 1; k
< 8; k
++)
369 mugxgy
+= col_sums_gxgy
[k
];
370 ssim
[(j
- 4) * w
+ i
] = (2 * mugxgy
+ c2
) / (mugx2
+ mugy2
+ c2
);
372 FS_COL_SET(0, -1, 1);
374 FS_COL_SUB(2, -3, 2);
377 FS_COL_SUB(3, -4, 3);
382 FS_COL_ADD(4, -4, 5);
385 FS_COL_ADD(5, -3, 6);
388 FS_COL_ADD(6, -2, 7);
390 FS_COL_SET(7, -1, 8);
398 #define FS_NLEVELS (4)
400 /*These weights were derived from the default weights found in Wang's original
401 Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}.
402 We drop the finest scale and renormalize the rest to sum to 1.*/
404 static const double FS_WEIGHTS
[FS_NLEVELS
] = {0.2989654541015625,
405 0.3141326904296875, 0.2473602294921875, 0.1395416259765625};
407 static double fs_average(fs_ctx
*_ctx
, int _l
) {
414 w
= _ctx
->level
[_l
].w
;
415 h
= _ctx
->level
[_l
].h
;
416 ssim
= _ctx
->level
[_l
].ssim
;
418 for (j
= 0; j
< h
; j
++)
419 for (i
= 0; i
< w
; i
++)
420 ret
+= ssim
[j
* w
+ i
];
421 return pow(ret
/ (w
* h
), FS_WEIGHTS
[_l
]);
424 static double calc_ssim(const unsigned char *_src
, int _systride
,
425 const unsigned char *_dst
, int _dystride
, int _w
, int _h
) {
430 fs_ctx_init(&ctx
, _w
, _h
, FS_NLEVELS
);
431 fs_downsample_level0(&ctx
, _src
, _systride
, _dst
, _dystride
, _w
, _h
);
432 for (l
= 0; l
< FS_NLEVELS
- 1; l
++) {
433 fs_calc_structure(&ctx
, l
);
434 ret
*= fs_average(&ctx
, l
);
435 fs_downsample_level(&ctx
, l
+ 1);
437 fs_calc_structure(&ctx
, l
);
438 fs_apply_luminance(&ctx
, l
);
439 ret
*= fs_average(&ctx
, l
);
444 static double convert_ssim_db(double _ssim
, double _weight
) {
445 return 10 * (log10(_weight
) - log10(_weight
- _ssim
));
448 double vpx_calc_fastssim(const YV12_BUFFER_CONFIG
*source
,
449 const YV12_BUFFER_CONFIG
*dest
,
450 double *ssim_y
, double *ssim_u
, double *ssim_v
) {
452 vpx_clear_system_state();
454 *ssim_y
= calc_ssim(source
->y_buffer
, source
->y_stride
, dest
->y_buffer
,
455 dest
->y_stride
, source
->y_crop_width
,
456 source
->y_crop_height
);
458 *ssim_u
= calc_ssim(source
->u_buffer
, source
->uv_stride
, dest
->u_buffer
,
459 dest
->uv_stride
, source
->uv_crop_width
,
460 source
->uv_crop_height
);
462 *ssim_v
= calc_ssim(source
->v_buffer
, source
->uv_stride
, dest
->v_buffer
,
463 dest
->uv_stride
, source
->uv_crop_width
,
464 source
->uv_crop_height
);
465 ssimv
= (*ssim_y
) * .8 + .1 * ((*ssim_u
) + (*ssim_v
));
467 return convert_ssim_db(ssimv
, 1.0);