2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
24 nzrun = length byte...
26 length = uleb128 encoded integer
28 int xbzrle_encode_buffer(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
29 uint8_t *dst
, int dlen
)
31 uint32_t zrun_len
= 0, nzrun_len
= 0;
34 uint8_t *nzrun_start
= NULL
;
36 g_assert(!(((uintptr_t)old_buf
| (uintptr_t)new_buf
| slen
) %
45 /* not aligned to sizeof(long) */
46 res
= (slen
- i
) % sizeof(long);
47 while (res
&& old_buf
[i
] == new_buf
[i
]) {
53 /* word at a time for speed */
56 (*(long *)(old_buf
+ i
)) == (*(long *)(new_buf
+ i
))) {
58 zrun_len
+= sizeof(long);
61 /* go over the rest */
62 while (i
< slen
&& old_buf
[i
] == new_buf
[i
]) {
68 /* buffer unchanged */
69 if (zrun_len
== slen
) {
73 /* skip last zero run */
78 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
81 nzrun_start
= new_buf
+ i
;
87 /* not aligned to sizeof(long) */
88 res
= (slen
- i
) % sizeof(long);
89 while (res
&& old_buf
[i
] != new_buf
[i
]) {
95 /* word at a time for speed, use of 32-bit long okay */
97 /* truncation to 32-bit long okay */
98 unsigned long mask
= (unsigned long)0x0101010101010101ULL
;
101 xor = *(unsigned long *)(old_buf
+ i
)
102 ^ *(unsigned long *)(new_buf
+ i
);
103 if ((xor - mask
) & ~xor & (mask
<< 7)) {
104 /* found the end of an nzrun within the current long */
105 while (old_buf
[i
] != new_buf
[i
]) {
112 nzrun_len
+= sizeof(long);
117 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
119 if (d
+ nzrun_len
> dlen
) {
122 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
130 int xbzrle_decode_buffer(uint8_t *src
, int slen
, uint8_t *dst
, int dlen
)
139 if ((slen
- i
) < 2) {
143 ret
= uleb128_decode_small(src
+ i
, &count
);
144 if (ret
< 0 || (i
&& !count
)) {
156 if ((slen
- i
) < 2) {
160 ret
= uleb128_decode_small(src
+ i
, &count
);
161 if (ret
< 0 || !count
) {
167 if (d
+ count
> dlen
|| i
+ count
> slen
) {
171 memcpy(dst
+ d
, src
+ i
, count
);
179 #if defined(CONFIG_AVX512BW_OPT)
180 #include <immintrin.h>
182 int __attribute__((target("avx512bw")))
183 xbzrle_encode_buffer_avx512(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
184 uint8_t *dst
, int dlen
)
186 uint32_t zrun_len
= 0, nzrun_len
= 0;
187 int d
= 0, i
= 0, num
= 0;
188 uint8_t *nzrun_start
= NULL
;
189 /* add 1 to include residual part in main loop */
190 uint32_t count512s
= (slen
>> 6) + 1;
191 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
192 uint32_t count_residual
= slen
& 0b111111;
193 bool never_same
= true;
194 uint64_t mask_residual
= 1;
195 mask_residual
<<= count_residual
;
197 __m512i r
= _mm512_set1_epi32(0);
200 int bytes_to_check
= 64;
201 uint64_t mask
= 0xffffffffffffffff;
202 if (count512s
== 1) {
203 bytes_to_check
= count_residual
;
204 mask
= mask_residual
;
206 __m512i old_data
= _mm512_mask_loadu_epi8(r
,
208 __m512i new_data
= _mm512_mask_loadu_epi8(r
,
210 uint64_t comp
= _mm512_cmpeq_epi8_mask(old_data
, new_data
);
213 bool is_same
= (comp
& 0x1);
214 while (bytes_to_check
) {
220 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
221 if (d
+ nzrun_len
> dlen
) {
224 nzrun_start
= new_buf
+ i
- nzrun_len
;
225 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
229 /* 64 data at a time for speed */
230 if (count512s
&& (comp
== 0xffffffffffffffff)) {
237 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
239 bytes_to_check
-= num
;
242 if (bytes_to_check
) {
243 /* still has different data after same data */
244 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
250 if (never_same
|| zrun_len
) {
252 * never_same only acts if
253 * data begins with diff in first count512s
255 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
259 /* has diff, 64 data at a time for speed */
260 if ((bytes_to_check
== 64) && (comp
== 0x0)) {
266 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
268 bytes_to_check
-= num
;
271 if (bytes_to_check
) {
272 /* mask like 111000 */
273 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
275 if (d
+ nzrun_len
> dlen
) {
278 nzrun_start
= new_buf
+ i
- nzrun_len
;
279 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
287 if (nzrun_len
!= 0) {
288 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
290 if (d
+ nzrun_len
> dlen
) {
293 nzrun_start
= new_buf
+ i
- nzrun_len
;
294 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);