2 * Copyright 2013, GitHub, Inc
3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4 * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "git-compat-util.h"
24 static inline size_t min_size(size_t a
, size_t b
)
29 static inline size_t max_size(size_t a
, size_t b
)
34 static inline void buffer_grow(struct ewah_bitmap
*self
, size_t new_size
)
36 size_t rlw_offset
= (uint8_t *)self
->rlw
- (uint8_t *)self
->buffer
;
37 ALLOC_GROW(self
->buffer
, new_size
, self
->alloc_size
);
38 self
->rlw
= self
->buffer
+ (rlw_offset
/ sizeof(eword_t
));
41 static inline void buffer_push(struct ewah_bitmap
*self
, eword_t value
)
43 buffer_grow(self
, self
->buffer_size
+ 1);
44 self
->buffer
[self
->buffer_size
++] = value
;
47 static void buffer_push_rlw(struct ewah_bitmap
*self
, eword_t value
)
49 buffer_push(self
, value
);
50 self
->rlw
= self
->buffer
+ self
->buffer_size
- 1;
53 static size_t add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
56 eword_t runlen
, can_add
;
58 if (rlw_get_run_bit(self
->rlw
) != v
&& rlw_size(self
->rlw
) == 0) {
59 rlw_set_run_bit(self
->rlw
, v
);
60 } else if (rlw_get_literal_words(self
->rlw
) != 0 ||
61 rlw_get_run_bit(self
->rlw
) != v
) {
62 buffer_push_rlw(self
, 0);
63 if (v
) rlw_set_run_bit(self
->rlw
, v
);
67 runlen
= rlw_get_running_len(self
->rlw
);
68 can_add
= min_size(number
, RLW_LARGEST_RUNNING_COUNT
- runlen
);
70 rlw_set_running_len(self
->rlw
, runlen
+ can_add
);
73 while (number
>= RLW_LARGEST_RUNNING_COUNT
) {
74 buffer_push_rlw(self
, 0);
76 if (v
) rlw_set_run_bit(self
->rlw
, v
);
77 rlw_set_running_len(self
->rlw
, RLW_LARGEST_RUNNING_COUNT
);
78 number
-= RLW_LARGEST_RUNNING_COUNT
;
82 buffer_push_rlw(self
, 0);
85 if (v
) rlw_set_run_bit(self
->rlw
, v
);
86 rlw_set_running_len(self
->rlw
, number
);
92 size_t ewah_add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
97 self
->bit_size
+= number
* BITS_IN_EWORD
;
98 return add_empty_words(self
, v
, number
);
101 static size_t add_literal(struct ewah_bitmap
*self
, eword_t new_data
)
103 eword_t current_num
= rlw_get_literal_words(self
->rlw
);
105 if (current_num
>= RLW_LARGEST_LITERAL_COUNT
) {
106 buffer_push_rlw(self
, 0);
108 rlw_set_literal_words(self
->rlw
, 1);
109 buffer_push(self
, new_data
);
113 rlw_set_literal_words(self
->rlw
, current_num
+ 1);
116 assert(rlw_get_literal_words(self
->rlw
) == current_num
+ 1);
118 buffer_push(self
, new_data
);
122 void ewah_add_dirty_words(
123 struct ewah_bitmap
*self
, const eword_t
*buffer
,
124 size_t number
, int negate
)
126 size_t literals
, can_add
;
129 literals
= rlw_get_literal_words(self
->rlw
);
130 can_add
= min_size(number
, RLW_LARGEST_LITERAL_COUNT
- literals
);
132 rlw_set_literal_words(self
->rlw
, literals
+ can_add
);
134 buffer_grow(self
, self
->buffer_size
+ can_add
);
138 for (i
= 0; i
< can_add
; ++i
)
139 self
->buffer
[self
->buffer_size
++] = ~buffer
[i
];
141 memcpy(self
->buffer
+ self
->buffer_size
,
142 buffer
, can_add
* sizeof(eword_t
));
143 self
->buffer_size
+= can_add
;
146 self
->bit_size
+= can_add
* BITS_IN_EWORD
;
148 if (number
- can_add
== 0)
151 buffer_push_rlw(self
, 0);
157 static size_t add_empty_word(struct ewah_bitmap
*self
, int v
)
159 int no_literal
= (rlw_get_literal_words(self
->rlw
) == 0);
160 eword_t run_len
= rlw_get_running_len(self
->rlw
);
162 if (no_literal
&& run_len
== 0) {
163 rlw_set_run_bit(self
->rlw
, v
);
164 assert(rlw_get_run_bit(self
->rlw
) == v
);
167 if (no_literal
&& rlw_get_run_bit(self
->rlw
) == v
&&
168 run_len
< RLW_LARGEST_RUNNING_COUNT
) {
169 rlw_set_running_len(self
->rlw
, run_len
+ 1);
170 assert(rlw_get_running_len(self
->rlw
) == run_len
+ 1);
173 buffer_push_rlw(self
, 0);
175 assert(rlw_get_running_len(self
->rlw
) == 0);
176 assert(rlw_get_run_bit(self
->rlw
) == 0);
177 assert(rlw_get_literal_words(self
->rlw
) == 0);
179 rlw_set_run_bit(self
->rlw
, v
);
180 assert(rlw_get_run_bit(self
->rlw
) == v
);
182 rlw_set_running_len(self
->rlw
, 1);
183 assert(rlw_get_running_len(self
->rlw
) == 1);
184 assert(rlw_get_literal_words(self
->rlw
) == 0);
189 size_t ewah_add(struct ewah_bitmap
*self
, eword_t word
)
191 self
->bit_size
+= BITS_IN_EWORD
;
194 return add_empty_word(self
, 0);
196 if (word
== (eword_t
)(~0))
197 return add_empty_word(self
, 1);
199 return add_literal(self
, word
);
202 void ewah_set(struct ewah_bitmap
*self
, size_t i
)
205 DIV_ROUND_UP(i
+ 1, BITS_IN_EWORD
) -
206 DIV_ROUND_UP(self
->bit_size
, BITS_IN_EWORD
);
208 assert(i
>= self
->bit_size
);
210 self
->bit_size
= i
+ 1;
214 add_empty_words(self
, 0, dist
- 1);
216 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
220 if (rlw_get_literal_words(self
->rlw
) == 0) {
221 rlw_set_running_len(self
->rlw
,
222 rlw_get_running_len(self
->rlw
) - 1);
223 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
227 self
->buffer
[self
->buffer_size
- 1] |=
228 ((eword_t
)1 << (i
% BITS_IN_EWORD
));
230 /* check if we just completed a stream of 1s */
231 if (self
->buffer
[self
->buffer_size
- 1] == (eword_t
)(~0)) {
232 self
->buffer
[--self
->buffer_size
] = 0;
233 rlw_set_literal_words(self
->rlw
,
234 rlw_get_literal_words(self
->rlw
) - 1);
235 add_empty_word(self
, 1);
239 void ewah_each_bit(struct ewah_bitmap
*self
, void (*callback
)(size_t, void*), void *payload
)
245 while (pointer
< self
->buffer_size
) {
246 eword_t
*word
= &self
->buffer
[pointer
];
248 if (rlw_get_run_bit(word
)) {
249 size_t len
= rlw_get_running_len(word
) * BITS_IN_EWORD
;
250 for (k
= 0; k
< len
; ++k
, ++pos
)
251 callback(pos
, payload
);
253 pos
+= rlw_get_running_len(word
) * BITS_IN_EWORD
;
258 for (k
= 0; k
< rlw_get_literal_words(word
); ++k
) {
261 /* todo: zero count optimization */
262 for (c
= 0; c
< BITS_IN_EWORD
; ++c
, ++pos
) {
263 if ((self
->buffer
[pointer
] & ((eword_t
)1 << c
)) != 0)
264 callback(pos
, payload
);
273 * Clear all the bits in the bitmap. Does not free or resize
276 static void ewah_clear(struct ewah_bitmap
*self
)
278 self
->buffer_size
= 1;
281 self
->rlw
= self
->buffer
;
284 struct ewah_bitmap
*ewah_new(void)
286 struct ewah_bitmap
*self
;
288 self
= xmalloc(sizeof(struct ewah_bitmap
));
289 self
->alloc_size
= 32;
290 ALLOC_ARRAY(self
->buffer
, self
->alloc_size
);
296 void ewah_free(struct ewah_bitmap
*self
)
301 if (self
->alloc_size
)
307 static void read_new_rlw(struct ewah_iterator
*it
)
309 const eword_t
*word
= NULL
;
315 word
= &it
->buffer
[it
->pointer
];
317 it
->rl
= rlw_get_running_len(word
);
318 it
->lw
= rlw_get_literal_words(word
);
319 it
->b
= rlw_get_run_bit(word
);
321 if (it
->rl
|| it
->lw
)
324 if (it
->pointer
< it
->buffer_size
- 1) {
327 it
->pointer
= it
->buffer_size
;
333 int ewah_iterator_next(eword_t
*next
, struct ewah_iterator
*it
)
335 if (it
->pointer
>= it
->buffer_size
)
338 if (it
->compressed
< it
->rl
) {
340 *next
= it
->b
? (eword_t
)(~0) : 0;
342 assert(it
->literals
< it
->lw
);
347 assert(it
->pointer
< it
->buffer_size
);
349 *next
= it
->buffer
[it
->pointer
];
352 if (it
->compressed
== it
->rl
&& it
->literals
== it
->lw
) {
353 if (++it
->pointer
< it
->buffer_size
)
360 void ewah_iterator_init(struct ewah_iterator
*it
, struct ewah_bitmap
*parent
)
362 it
->buffer
= parent
->buffer
;
363 it
->buffer_size
= parent
->buffer_size
;
372 if (it
->pointer
< it
->buffer_size
)
377 struct ewah_bitmap
*ewah_i
,
378 struct ewah_bitmap
*ewah_j
,
379 struct ewah_bitmap
*out
)
381 struct rlw_iterator rlw_i
;
382 struct rlw_iterator rlw_j
;
385 rlwit_init(&rlw_i
, ewah_i
);
386 rlwit_init(&rlw_j
, ewah_j
);
388 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
389 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
390 struct rlw_iterator
*prey
, *predator
;
394 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
402 negate_words
= !!predator
->rlw
.running_bit
;
403 index
= rlwit_discharge(prey
, out
,
404 predator
->rlw
.running_len
, negate_words
);
406 ewah_add_empty_words(out
, negate_words
,
407 predator
->rlw
.running_len
- index
);
409 rlwit_discard_first_words(predator
,
410 predator
->rlw
.running_len
);
414 rlw_i
.rlw
.literal_words
,
415 rlw_j
.rlw
.literal_words
);
420 for (k
= 0; k
< literals
; ++k
) {
422 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] ^
423 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
427 rlwit_discard_first_words(&rlw_i
, literals
);
428 rlwit_discard_first_words(&rlw_j
, literals
);
432 if (rlwit_word_size(&rlw_i
) > 0)
433 rlwit_discharge(&rlw_i
, out
, ~0, 0);
435 rlwit_discharge(&rlw_j
, out
, ~0, 0);
437 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
440 #define BITMAP_POOL_MAX 16
441 static struct ewah_bitmap
*bitmap_pool
[BITMAP_POOL_MAX
];
442 static size_t bitmap_pool_size
;
444 struct ewah_bitmap
*ewah_pool_new(void)
446 if (bitmap_pool_size
)
447 return bitmap_pool
[--bitmap_pool_size
];
452 void ewah_pool_free(struct ewah_bitmap
*self
)
457 if (bitmap_pool_size
== BITMAP_POOL_MAX
||
458 self
->alloc_size
== 0) {
464 bitmap_pool
[bitmap_pool_size
++] = self
;
467 uint32_t ewah_checksum(struct ewah_bitmap
*self
)
469 const uint8_t *p
= (uint8_t *)self
->buffer
;
470 uint32_t crc
= (uint32_t)self
->bit_size
;
471 size_t size
= self
->buffer_size
* sizeof(eword_t
);
474 crc
= (crc
<< 5) - crc
+ (uint32_t)*p
++;