2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
23 nzrun = length byte...
25 length = uleb128 encoded integer
27 int xbzrle_encode_buffer(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
28 uint8_t *dst
, int dlen
)
30 uint32_t zrun_len
= 0, nzrun_len
= 0;
33 uint8_t *nzrun_start
= NULL
;
35 g_assert(!(((uintptr_t)old_buf
| (uintptr_t)new_buf
| slen
) %
44 /* not aligned to sizeof(long) */
45 res
= (slen
- i
) % sizeof(long);
46 while (res
&& old_buf
[i
] == new_buf
[i
]) {
52 /* word at a time for speed */
55 (*(long *)(old_buf
+ i
)) == (*(long *)(new_buf
+ i
))) {
57 zrun_len
+= sizeof(long);
60 /* go over the rest */
61 while (i
< slen
&& old_buf
[i
] == new_buf
[i
]) {
67 /* buffer unchanged */
68 if (zrun_len
== slen
) {
72 /* skip last zero run */
77 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
80 nzrun_start
= new_buf
+ i
;
86 /* not aligned to sizeof(long) */
87 res
= (slen
- i
) % sizeof(long);
88 while (res
&& old_buf
[i
] != new_buf
[i
]) {
94 /* word at a time for speed, use of 32-bit long okay */
96 /* truncation to 32-bit long okay */
97 unsigned long mask
= (unsigned long)0x0101010101010101ULL
;
100 xor = *(unsigned long *)(old_buf
+ i
)
101 ^ *(unsigned long *)(new_buf
+ i
);
102 if ((xor - mask
) & ~xor & (mask
<< 7)) {
103 /* found the end of an nzrun within the current long */
104 while (old_buf
[i
] != new_buf
[i
]) {
111 nzrun_len
+= sizeof(long);
116 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
118 if (d
+ nzrun_len
> dlen
) {
121 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
129 int xbzrle_decode_buffer(uint8_t *src
, int slen
, uint8_t *dst
, int dlen
)
138 if ((slen
- i
) < 2) {
142 ret
= uleb128_decode_small(src
+ i
, &count
);
143 if (ret
< 0 || (i
&& !count
)) {
155 if ((slen
- i
) < 2) {
159 ret
= uleb128_decode_small(src
+ i
, &count
);
160 if (ret
< 0 || !count
) {
166 if (d
+ count
> dlen
|| i
+ count
> slen
) {
170 memcpy(dst
+ d
, src
+ i
, count
);
178 #if defined(CONFIG_AVX512BW_OPT)
179 #pragma GCC push_options
180 #pragma GCC target("avx512bw")
181 #include <immintrin.h>
182 int xbzrle_encode_buffer_avx512(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
183 uint8_t *dst
, int dlen
)
185 uint32_t zrun_len
= 0, nzrun_len
= 0;
186 int d
= 0, i
= 0, num
= 0;
187 uint8_t *nzrun_start
= NULL
;
188 /* add 1 to include residual part in main loop */
189 uint32_t count512s
= (slen
>> 6) + 1;
190 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
191 uint32_t count_residual
= slen
& 0b111111;
192 bool never_same
= true;
193 uint64_t mask_residual
= 1;
194 mask_residual
<<= count_residual
;
196 __m512i r
= _mm512_set1_epi32(0);
203 int bytes_to_check
= 64;
204 uint64_t mask
= 0xffffffffffffffff;
205 if (count512s
== 1) {
206 bytes_to_check
= count_residual
;
207 mask
= mask_residual
;
209 __m512i old_data
= _mm512_mask_loadu_epi8(r
,
211 __m512i new_data
= _mm512_mask_loadu_epi8(r
,
213 uint64_t comp
= _mm512_cmpeq_epi8_mask(old_data
, new_data
);
216 bool is_same
= (comp
& 0x1);
217 while (bytes_to_check
) {
220 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
221 if (d
+ nzrun_len
> dlen
) {
224 nzrun_start
= new_buf
+ i
- nzrun_len
;
225 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
229 /* 64 data at a time for speed */
230 if (count512s
&& (comp
== 0xffffffffffffffff)) {
236 num
= __builtin_ctzll(~comp
);
237 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
239 bytes_to_check
-= num
;
242 if (bytes_to_check
) {
243 /* still has different data after same data */
244 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
250 if (never_same
|| zrun_len
) {
252 * never_same only acts if
253 * data begins with diff in first count512s
255 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
259 /* has diff, 64 data at a time for speed */
260 if ((bytes_to_check
== 64) && (comp
== 0x0)) {
265 num
= __builtin_ctzll(comp
);
266 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
268 bytes_to_check
-= num
;
271 if (bytes_to_check
) {
272 /* mask like 111000 */
273 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
275 if (d
+ nzrun_len
> dlen
) {
278 nzrun_start
= new_buf
+ i
- nzrun_len
;
279 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
287 if (nzrun_len
!= 0) {
288 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
290 if (d
+ nzrun_len
> dlen
) {
293 nzrun_start
= new_buf
+ i
- nzrun_len
;
294 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
299 #pragma GCC pop_options