2 * Xor Based Zero Run Length Encoding
4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
7 * Orit Wasserman <owasserm@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/cutils.h"
15 #include "qemu/host-utils.h"
18 #if defined(CONFIG_AVX512BW_OPT)
19 #include <immintrin.h>
20 #include "host/cpuinfo.h"
22 static int __attribute__((target("avx512bw")))
23 xbzrle_encode_buffer_avx512(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
24 uint8_t *dst
, int dlen
)
26 uint32_t zrun_len
= 0, nzrun_len
= 0;
27 int d
= 0, i
= 0, num
= 0;
28 uint8_t *nzrun_start
= NULL
;
29 /* add 1 to include residual part in main loop */
30 uint32_t count512s
= (slen
>> 6) + 1;
31 /* countResidual is tail of data, i.e., countResidual = slen % 64 */
32 uint32_t count_residual
= slen
& 0b111111;
33 bool never_same
= true;
34 uint64_t mask_residual
= 1;
35 mask_residual
<<= count_residual
;
37 __m512i r
= _mm512_set1_epi32(0);
40 int bytes_to_check
= 64;
41 uint64_t mask
= 0xffffffffffffffff;
43 bytes_to_check
= count_residual
;
46 __m512i old_data
= _mm512_mask_loadu_epi8(r
,
48 __m512i new_data
= _mm512_mask_loadu_epi8(r
,
50 uint64_t comp
= _mm512_cmpeq_epi8_mask(old_data
, new_data
);
53 bool is_same
= (comp
& 0x1);
54 while (bytes_to_check
) {
60 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
61 if (d
+ nzrun_len
> dlen
) {
64 nzrun_start
= new_buf
+ i
- nzrun_len
;
65 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
69 /* 64 data at a time for speed */
70 if (count512s
&& (comp
== 0xffffffffffffffff)) {
77 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
79 bytes_to_check
-= num
;
83 /* still has different data after same data */
84 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
90 if (never_same
|| zrun_len
) {
92 * never_same only acts if
93 * data begins with diff in first count512s
95 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
99 /* has diff, 64 data at a time for speed */
100 if ((bytes_to_check
== 64) && (comp
== 0x0)) {
106 num
= (num
< bytes_to_check
) ? num
: bytes_to_check
;
108 bytes_to_check
-= num
;
111 if (bytes_to_check
) {
112 /* mask like 111000 */
113 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
115 if (d
+ nzrun_len
> dlen
) {
118 nzrun_start
= new_buf
+ i
- nzrun_len
;
119 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
127 if (nzrun_len
!= 0) {
128 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
130 if (d
+ nzrun_len
> dlen
) {
133 nzrun_start
= new_buf
+ i
- nzrun_len
;
134 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
140 static int xbzrle_encode_buffer_int(uint8_t *old_buf
, uint8_t *new_buf
,
141 int slen
, uint8_t *dst
, int dlen
);
143 static int (*accel_func
)(uint8_t *, uint8_t *, int, uint8_t *, int);
145 static void __attribute__((constructor
)) init_accel(void)
147 unsigned info
= cpuinfo_init();
148 if (info
& CPUINFO_AVX512BW
) {
149 accel_func
= xbzrle_encode_buffer_avx512
;
151 accel_func
= xbzrle_encode_buffer_int
;
155 int xbzrle_encode_buffer(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
156 uint8_t *dst
, int dlen
)
158 return accel_func(old_buf
, new_buf
, slen
, dst
, dlen
);
161 #define xbzrle_encode_buffer xbzrle_encode_buffer_int
170 nzrun = length byte...
172 length = uleb128 encoded integer
174 int xbzrle_encode_buffer(uint8_t *old_buf
, uint8_t *new_buf
, int slen
,
175 uint8_t *dst
, int dlen
)
177 uint32_t zrun_len
= 0, nzrun_len
= 0;
180 uint8_t *nzrun_start
= NULL
;
182 g_assert(!(((uintptr_t)old_buf
| (uintptr_t)new_buf
| slen
) %
191 /* not aligned to sizeof(long) */
192 res
= (slen
- i
) % sizeof(long);
193 while (res
&& old_buf
[i
] == new_buf
[i
]) {
199 /* word at a time for speed */
202 (*(long *)(old_buf
+ i
)) == (*(long *)(new_buf
+ i
))) {
204 zrun_len
+= sizeof(long);
207 /* go over the rest */
208 while (i
< slen
&& old_buf
[i
] == new_buf
[i
]) {
214 /* buffer unchanged */
215 if (zrun_len
== slen
) {
219 /* skip last zero run */
224 d
+= uleb128_encode_small(dst
+ d
, zrun_len
);
227 nzrun_start
= new_buf
+ i
;
233 /* not aligned to sizeof(long) */
234 res
= (slen
- i
) % sizeof(long);
235 while (res
&& old_buf
[i
] != new_buf
[i
]) {
241 /* word at a time for speed, use of 32-bit long okay */
243 /* truncation to 32-bit long okay */
244 unsigned long mask
= (unsigned long)0x0101010101010101ULL
;
247 xor = *(unsigned long *)(old_buf
+ i
)
248 ^ *(unsigned long *)(new_buf
+ i
);
249 if ((xor - mask
) & ~xor & (mask
<< 7)) {
250 /* found the end of an nzrun within the current long */
251 while (old_buf
[i
] != new_buf
[i
]) {
258 nzrun_len
+= sizeof(long);
263 d
+= uleb128_encode_small(dst
+ d
, nzrun_len
);
265 if (d
+ nzrun_len
> dlen
) {
268 memcpy(dst
+ d
, nzrun_start
, nzrun_len
);
276 int xbzrle_decode_buffer(uint8_t *src
, int slen
, uint8_t *dst
, int dlen
)
285 if ((slen
- i
) < 2) {
289 ret
= uleb128_decode_small(src
+ i
, &count
);
290 if (ret
< 0 || (i
&& !count
)) {
302 if ((slen
- i
) < 2) {
306 ret
= uleb128_decode_small(src
+ i
, &count
);
307 if (ret
< 0 || !count
) {
313 if (d
+ count
> dlen
|| i
+ count
> slen
) {
317 memcpy(dst
+ d
, src
+ i
, count
);