audio: make playback packet length calculation exact
[qemu/ar7.git] / migration / xbzrle.c
blob05366e86c05a3d7e901d197fc406271f54f0eebb
1 /*
2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
6 * Authors:
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "xbzrle.h"
18 page = zrun nzrun
19 | zrun nzrun page
21 zrun = length
23 nzrun = length byte...
25 length = uleb128 encoded integer
27 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
28 uint8_t *dst, int dlen)
30 uint32_t zrun_len = 0, nzrun_len = 0;
31 int d = 0, i = 0;
32 long res;
33 uint8_t *nzrun_start = NULL;
35 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
36 sizeof(long)));
38 while (i < slen) {
39 /* overflow */
40 if (d + 2 > dlen) {
41 return -1;
44 /* not aligned to sizeof(long) */
45 res = (slen - i) % sizeof(long);
46 while (res && old_buf[i] == new_buf[i]) {
47 zrun_len++;
48 i++;
49 res--;
52 /* word at a time for speed */
53 if (!res) {
54 while (i < slen &&
55 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
56 i += sizeof(long);
57 zrun_len += sizeof(long);
60 /* go over the rest */
61 while (i < slen && old_buf[i] == new_buf[i]) {
62 zrun_len++;
63 i++;
67 /* buffer unchanged */
68 if (zrun_len == slen) {
69 return 0;
72 /* skip last zero run */
73 if (i == slen) {
74 return d;
77 d += uleb128_encode_small(dst + d, zrun_len);
79 zrun_len = 0;
80 nzrun_start = new_buf + i;
82 /* overflow */
83 if (d + 2 > dlen) {
84 return -1;
86 /* not aligned to sizeof(long) */
87 res = (slen - i) % sizeof(long);
88 while (res && old_buf[i] != new_buf[i]) {
89 i++;
90 nzrun_len++;
91 res--;
94 /* word at a time for speed, use of 32-bit long okay */
95 if (!res) {
96 /* truncation to 32-bit long okay */
97 unsigned long mask = (unsigned long)0x0101010101010101ULL;
98 while (i < slen) {
99 unsigned long xor;
100 xor = *(unsigned long *)(old_buf + i)
101 ^ *(unsigned long *)(new_buf + i);
102 if ((xor - mask) & ~xor & (mask << 7)) {
103 /* found the end of an nzrun within the current long */
104 while (old_buf[i] != new_buf[i]) {
105 nzrun_len++;
106 i++;
108 break;
109 } else {
110 i += sizeof(long);
111 nzrun_len += sizeof(long);
116 d += uleb128_encode_small(dst + d, nzrun_len);
117 /* overflow */
118 if (d + nzrun_len > dlen) {
119 return -1;
121 memcpy(dst + d, nzrun_start, nzrun_len);
122 d += nzrun_len;
123 nzrun_len = 0;
126 return d;
129 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
131 int i = 0, d = 0;
132 int ret;
133 uint32_t count = 0;
135 while (i < slen) {
137 /* zrun */
138 if ((slen - i) < 2) {
139 return -1;
142 ret = uleb128_decode_small(src + i, &count);
143 if (ret < 0 || (i && !count)) {
144 return -1;
146 i += ret;
147 d += count;
149 /* overflow */
150 if (d > dlen) {
151 return -1;
154 /* nzrun */
155 if ((slen - i) < 2) {
156 return -1;
159 ret = uleb128_decode_small(src + i, &count);
160 if (ret < 0 || !count) {
161 return -1;
163 i += ret;
165 /* overflow */
166 if (d + count > dlen || i + count > slen) {
167 return -1;
170 memcpy(dst + d, src + i, count);
171 d += count;
172 i += count;
175 return d;
178 #if defined(CONFIG_AVX512BW_OPT)
179 #pragma GCC push_options
180 #pragma GCC target("avx512bw")
181 #include <immintrin.h>
182 int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
183 uint8_t *dst, int dlen)
185 uint32_t zrun_len = 0, nzrun_len = 0;
186 int d = 0, i = 0, num = 0;
187 uint8_t *nzrun_start = NULL;
188 /* add 1 to include residual part in main loop */
189 uint32_t count512s = (slen >> 6) + 1;
190 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
191 uint32_t count_residual = slen & 0b111111;
192 bool never_same = true;
193 uint64_t mask_residual = 1;
194 mask_residual <<= count_residual;
195 mask_residual -= 1;
196 __m512i r = _mm512_set1_epi32(0);
198 while (count512s) {
199 if (d + 2 > dlen) {
200 return -1;
203 int bytes_to_check = 64;
204 uint64_t mask = 0xffffffffffffffff;
205 if (count512s == 1) {
206 bytes_to_check = count_residual;
207 mask = mask_residual;
209 __m512i old_data = _mm512_mask_loadu_epi8(r,
210 mask, old_buf + i);
211 __m512i new_data = _mm512_mask_loadu_epi8(r,
212 mask, new_buf + i);
213 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
214 count512s--;
216 bool is_same = (comp & 0x1);
217 while (bytes_to_check) {
218 if (is_same) {
219 if (nzrun_len) {
220 d += uleb128_encode_small(dst + d, nzrun_len);
221 if (d + nzrun_len > dlen) {
222 return -1;
224 nzrun_start = new_buf + i - nzrun_len;
225 memcpy(dst + d, nzrun_start, nzrun_len);
226 d += nzrun_len;
227 nzrun_len = 0;
229 /* 64 data at a time for speed */
230 if (count512s && (comp == 0xffffffffffffffff)) {
231 i += 64;
232 zrun_len += 64;
233 break;
235 never_same = false;
236 num = __builtin_ctzll(~comp);
237 num = (num < bytes_to_check) ? num : bytes_to_check;
238 zrun_len += num;
239 bytes_to_check -= num;
240 comp >>= num;
241 i += num;
242 if (bytes_to_check) {
243 /* still has different data after same data */
244 d += uleb128_encode_small(dst + d, zrun_len);
245 zrun_len = 0;
246 } else {
247 break;
250 if (never_same || zrun_len) {
252 * never_same only acts if
253 * data begins with diff in first count512s
255 d += uleb128_encode_small(dst + d, zrun_len);
256 zrun_len = 0;
257 never_same = false;
259 /* has diff, 64 data at a time for speed */
260 if ((bytes_to_check == 64) && (comp == 0x0)) {
261 i += 64;
262 nzrun_len += 64;
263 break;
265 num = __builtin_ctzll(comp);
266 num = (num < bytes_to_check) ? num : bytes_to_check;
267 nzrun_len += num;
268 bytes_to_check -= num;
269 comp >>= num;
270 i += num;
271 if (bytes_to_check) {
272 /* mask like 111000 */
273 d += uleb128_encode_small(dst + d, nzrun_len);
274 /* overflow */
275 if (d + nzrun_len > dlen) {
276 return -1;
278 nzrun_start = new_buf + i - nzrun_len;
279 memcpy(dst + d, nzrun_start, nzrun_len);
280 d += nzrun_len;
281 nzrun_len = 0;
282 is_same = true;
287 if (nzrun_len != 0) {
288 d += uleb128_encode_small(dst + d, nzrun_len);
289 /* overflow */
290 if (d + nzrun_len > dlen) {
291 return -1;
293 nzrun_start = new_buf + i - nzrun_len;
294 memcpy(dst + d, nzrun_start, nzrun_len);
295 d += nzrun_len;
297 return d;
299 #pragma GCC pop_options
300 #endif