qga/qapi-schema: Reformat doc comments to conform to current conventions
[qemu/ar7.git] / migration / xbzrle.c
blob258e4959c99df5f473cd9ebc42f12de71f8057b0
1 /*
2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
6 * Authors:
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
16 #include "xbzrle.h"
19 page = zrun nzrun
20 | zrun nzrun page
22 zrun = length
24 nzrun = length byte...
26 length = uleb128 encoded integer
28 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
29 uint8_t *dst, int dlen)
31 uint32_t zrun_len = 0, nzrun_len = 0;
32 int d = 0, i = 0;
33 long res;
34 uint8_t *nzrun_start = NULL;
36 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
37 sizeof(long)));
39 while (i < slen) {
40 /* overflow */
41 if (d + 2 > dlen) {
42 return -1;
45 /* not aligned to sizeof(long) */
46 res = (slen - i) % sizeof(long);
47 while (res && old_buf[i] == new_buf[i]) {
48 zrun_len++;
49 i++;
50 res--;
53 /* word at a time for speed */
54 if (!res) {
55 while (i < slen &&
56 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
57 i += sizeof(long);
58 zrun_len += sizeof(long);
61 /* go over the rest */
62 while (i < slen && old_buf[i] == new_buf[i]) {
63 zrun_len++;
64 i++;
68 /* buffer unchanged */
69 if (zrun_len == slen) {
70 return 0;
73 /* skip last zero run */
74 if (i == slen) {
75 return d;
78 d += uleb128_encode_small(dst + d, zrun_len);
80 zrun_len = 0;
81 nzrun_start = new_buf + i;
83 /* overflow */
84 if (d + 2 > dlen) {
85 return -1;
87 /* not aligned to sizeof(long) */
88 res = (slen - i) % sizeof(long);
89 while (res && old_buf[i] != new_buf[i]) {
90 i++;
91 nzrun_len++;
92 res--;
95 /* word at a time for speed, use of 32-bit long okay */
96 if (!res) {
97 /* truncation to 32-bit long okay */
98 unsigned long mask = (unsigned long)0x0101010101010101ULL;
99 while (i < slen) {
100 unsigned long xor;
101 xor = *(unsigned long *)(old_buf + i)
102 ^ *(unsigned long *)(new_buf + i);
103 if ((xor - mask) & ~xor & (mask << 7)) {
104 /* found the end of an nzrun within the current long */
105 while (old_buf[i] != new_buf[i]) {
106 nzrun_len++;
107 i++;
109 break;
110 } else {
111 i += sizeof(long);
112 nzrun_len += sizeof(long);
117 d += uleb128_encode_small(dst + d, nzrun_len);
118 /* overflow */
119 if (d + nzrun_len > dlen) {
120 return -1;
122 memcpy(dst + d, nzrun_start, nzrun_len);
123 d += nzrun_len;
124 nzrun_len = 0;
127 return d;
130 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
132 int i = 0, d = 0;
133 int ret;
134 uint32_t count = 0;
136 while (i < slen) {
138 /* zrun */
139 if ((slen - i) < 2) {
140 return -1;
143 ret = uleb128_decode_small(src + i, &count);
144 if (ret < 0 || (i && !count)) {
145 return -1;
147 i += ret;
148 d += count;
150 /* overflow */
151 if (d > dlen) {
152 return -1;
155 /* nzrun */
156 if ((slen - i) < 2) {
157 return -1;
160 ret = uleb128_decode_small(src + i, &count);
161 if (ret < 0 || !count) {
162 return -1;
164 i += ret;
166 /* overflow */
167 if (d + count > dlen || i + count > slen) {
168 return -1;
171 memcpy(dst + d, src + i, count);
172 d += count;
173 i += count;
176 return d;
179 #if defined(CONFIG_AVX512BW_OPT)
180 #include <immintrin.h>
182 int __attribute__((target("avx512bw")))
183 xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
184 uint8_t *dst, int dlen)
186 uint32_t zrun_len = 0, nzrun_len = 0;
187 int d = 0, i = 0, num = 0;
188 uint8_t *nzrun_start = NULL;
189 /* add 1 to include residual part in main loop */
190 uint32_t count512s = (slen >> 6) + 1;
191 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
192 uint32_t count_residual = slen & 0b111111;
193 bool never_same = true;
194 uint64_t mask_residual = 1;
195 mask_residual <<= count_residual;
196 mask_residual -= 1;
197 __m512i r = _mm512_set1_epi32(0);
199 while (count512s) {
200 int bytes_to_check = 64;
201 uint64_t mask = 0xffffffffffffffff;
202 if (count512s == 1) {
203 bytes_to_check = count_residual;
204 mask = mask_residual;
206 __m512i old_data = _mm512_mask_loadu_epi8(r,
207 mask, old_buf + i);
208 __m512i new_data = _mm512_mask_loadu_epi8(r,
209 mask, new_buf + i);
210 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
211 count512s--;
213 bool is_same = (comp & 0x1);
214 while (bytes_to_check) {
215 if (d + 2 > dlen) {
216 return -1;
218 if (is_same) {
219 if (nzrun_len) {
220 d += uleb128_encode_small(dst + d, nzrun_len);
221 if (d + nzrun_len > dlen) {
222 return -1;
224 nzrun_start = new_buf + i - nzrun_len;
225 memcpy(dst + d, nzrun_start, nzrun_len);
226 d += nzrun_len;
227 nzrun_len = 0;
229 /* 64 data at a time for speed */
230 if (count512s && (comp == 0xffffffffffffffff)) {
231 i += 64;
232 zrun_len += 64;
233 break;
235 never_same = false;
236 num = ctz64(~comp);
237 num = (num < bytes_to_check) ? num : bytes_to_check;
238 zrun_len += num;
239 bytes_to_check -= num;
240 comp >>= num;
241 i += num;
242 if (bytes_to_check) {
243 /* still has different data after same data */
244 d += uleb128_encode_small(dst + d, zrun_len);
245 zrun_len = 0;
246 } else {
247 break;
250 if (never_same || zrun_len) {
252 * never_same only acts if
253 * data begins with diff in first count512s
255 d += uleb128_encode_small(dst + d, zrun_len);
256 zrun_len = 0;
257 never_same = false;
259 /* has diff, 64 data at a time for speed */
260 if ((bytes_to_check == 64) && (comp == 0x0)) {
261 i += 64;
262 nzrun_len += 64;
263 break;
265 num = ctz64(comp);
266 num = (num < bytes_to_check) ? num : bytes_to_check;
267 nzrun_len += num;
268 bytes_to_check -= num;
269 comp >>= num;
270 i += num;
271 if (bytes_to_check) {
272 /* mask like 111000 */
273 d += uleb128_encode_small(dst + d, nzrun_len);
274 /* overflow */
275 if (d + nzrun_len > dlen) {
276 return -1;
278 nzrun_start = new_buf + i - nzrun_len;
279 memcpy(dst + d, nzrun_start, nzrun_len);
280 d += nzrun_len;
281 nzrun_len = 0;
282 is_same = true;
287 if (nzrun_len != 0) {
288 d += uleb128_encode_small(dst + d, nzrun_len);
289 /* overflow */
290 if (d + nzrun_len > dlen) {
291 return -1;
293 nzrun_start = new_buf + i - nzrun_len;
294 memcpy(dst + d, nzrun_start, nzrun_len);
295 d += nzrun_len;
297 return d;
299 #endif