2 * Copyright 2013, GitHub, Inc
3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4 * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "git-compat-util.h"
23 static inline size_t min_size(size_t a
, size_t b
)
28 static inline size_t max_size(size_t a
, size_t b
)
33 static inline void buffer_grow(struct ewah_bitmap
*self
, size_t new_size
)
35 size_t rlw_offset
= (uint8_t *)self
->rlw
- (uint8_t *)self
->buffer
;
36 ALLOC_GROW(self
->buffer
, new_size
, self
->alloc_size
);
37 self
->rlw
= self
->buffer
+ (rlw_offset
/ sizeof(eword_t
));
40 static inline void buffer_push(struct ewah_bitmap
*self
, eword_t value
)
42 buffer_grow(self
, self
->buffer_size
+ 1);
43 self
->buffer
[self
->buffer_size
++] = value
;
46 static void buffer_push_rlw(struct ewah_bitmap
*self
, eword_t value
)
48 buffer_push(self
, value
);
49 self
->rlw
= self
->buffer
+ self
->buffer_size
- 1;
52 static size_t add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
55 eword_t runlen
, can_add
;
57 if (rlw_get_run_bit(self
->rlw
) != v
&& rlw_size(self
->rlw
) == 0) {
58 rlw_set_run_bit(self
->rlw
, v
);
59 } else if (rlw_get_literal_words(self
->rlw
) != 0 ||
60 rlw_get_run_bit(self
->rlw
) != v
) {
61 buffer_push_rlw(self
, 0);
62 if (v
) rlw_set_run_bit(self
->rlw
, v
);
66 runlen
= rlw_get_running_len(self
->rlw
);
67 can_add
= min_size(number
, RLW_LARGEST_RUNNING_COUNT
- runlen
);
69 rlw_set_running_len(self
->rlw
, runlen
+ can_add
);
72 while (number
>= RLW_LARGEST_RUNNING_COUNT
) {
73 buffer_push_rlw(self
, 0);
75 if (v
) rlw_set_run_bit(self
->rlw
, v
);
76 rlw_set_running_len(self
->rlw
, RLW_LARGEST_RUNNING_COUNT
);
77 number
-= RLW_LARGEST_RUNNING_COUNT
;
81 buffer_push_rlw(self
, 0);
84 if (v
) rlw_set_run_bit(self
->rlw
, v
);
85 rlw_set_running_len(self
->rlw
, number
);
91 size_t ewah_add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
96 self
->bit_size
+= number
* BITS_IN_EWORD
;
97 return add_empty_words(self
, v
, number
);
100 static size_t add_literal(struct ewah_bitmap
*self
, eword_t new_data
)
102 eword_t current_num
= rlw_get_literal_words(self
->rlw
);
104 if (current_num
>= RLW_LARGEST_LITERAL_COUNT
) {
105 buffer_push_rlw(self
, 0);
107 rlw_set_literal_words(self
->rlw
, 1);
108 buffer_push(self
, new_data
);
112 rlw_set_literal_words(self
->rlw
, current_num
+ 1);
115 assert(rlw_get_literal_words(self
->rlw
) == current_num
+ 1);
117 buffer_push(self
, new_data
);
121 void ewah_add_dirty_words(
122 struct ewah_bitmap
*self
, const eword_t
*buffer
,
123 size_t number
, int negate
)
125 size_t literals
, can_add
;
128 literals
= rlw_get_literal_words(self
->rlw
);
129 can_add
= min_size(number
, RLW_LARGEST_LITERAL_COUNT
- literals
);
131 rlw_set_literal_words(self
->rlw
, literals
+ can_add
);
133 buffer_grow(self
, self
->buffer_size
+ can_add
);
137 for (i
= 0; i
< can_add
; ++i
)
138 self
->buffer
[self
->buffer_size
++] = ~buffer
[i
];
140 memcpy(self
->buffer
+ self
->buffer_size
,
141 buffer
, can_add
* sizeof(eword_t
));
142 self
->buffer_size
+= can_add
;
145 self
->bit_size
+= can_add
* BITS_IN_EWORD
;
147 if (number
- can_add
== 0)
150 buffer_push_rlw(self
, 0);
156 static size_t add_empty_word(struct ewah_bitmap
*self
, int v
)
158 int no_literal
= (rlw_get_literal_words(self
->rlw
) == 0);
159 eword_t run_len
= rlw_get_running_len(self
->rlw
);
161 if (no_literal
&& run_len
== 0) {
162 rlw_set_run_bit(self
->rlw
, v
);
163 assert(rlw_get_run_bit(self
->rlw
) == v
);
166 if (no_literal
&& rlw_get_run_bit(self
->rlw
) == v
&&
167 run_len
< RLW_LARGEST_RUNNING_COUNT
) {
168 rlw_set_running_len(self
->rlw
, run_len
+ 1);
169 assert(rlw_get_running_len(self
->rlw
) == run_len
+ 1);
172 buffer_push_rlw(self
, 0);
174 assert(rlw_get_running_len(self
->rlw
) == 0);
175 assert(rlw_get_run_bit(self
->rlw
) == 0);
176 assert(rlw_get_literal_words(self
->rlw
) == 0);
178 rlw_set_run_bit(self
->rlw
, v
);
179 assert(rlw_get_run_bit(self
->rlw
) == v
);
181 rlw_set_running_len(self
->rlw
, 1);
182 assert(rlw_get_running_len(self
->rlw
) == 1);
183 assert(rlw_get_literal_words(self
->rlw
) == 0);
188 size_t ewah_add(struct ewah_bitmap
*self
, eword_t word
)
190 self
->bit_size
+= BITS_IN_EWORD
;
193 return add_empty_word(self
, 0);
195 if (word
== (eword_t
)(~0))
196 return add_empty_word(self
, 1);
198 return add_literal(self
, word
);
201 void ewah_set(struct ewah_bitmap
*self
, size_t i
)
204 DIV_ROUND_UP(i
+ 1, BITS_IN_EWORD
) -
205 DIV_ROUND_UP(self
->bit_size
, BITS_IN_EWORD
);
207 assert(i
>= self
->bit_size
);
209 self
->bit_size
= i
+ 1;
213 add_empty_words(self
, 0, dist
- 1);
215 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
219 if (rlw_get_literal_words(self
->rlw
) == 0) {
220 rlw_set_running_len(self
->rlw
,
221 rlw_get_running_len(self
->rlw
) - 1);
222 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
226 self
->buffer
[self
->buffer_size
- 1] |=
227 ((eword_t
)1 << (i
% BITS_IN_EWORD
));
229 /* check if we just completed a stream of 1s */
230 if (self
->buffer
[self
->buffer_size
- 1] == (eword_t
)(~0)) {
231 self
->buffer
[--self
->buffer_size
] = 0;
232 rlw_set_literal_words(self
->rlw
,
233 rlw_get_literal_words(self
->rlw
) - 1);
234 add_empty_word(self
, 1);
238 void ewah_each_bit(struct ewah_bitmap
*self
, void (*callback
)(size_t, void*), void *payload
)
244 while (pointer
< self
->buffer_size
) {
245 eword_t
*word
= &self
->buffer
[pointer
];
247 if (rlw_get_run_bit(word
)) {
248 size_t len
= rlw_get_running_len(word
) * BITS_IN_EWORD
;
249 for (k
= 0; k
< len
; ++k
, ++pos
)
250 callback(pos
, payload
);
252 pos
+= rlw_get_running_len(word
) * BITS_IN_EWORD
;
257 for (k
= 0; k
< rlw_get_literal_words(word
); ++k
) {
260 /* todo: zero count optimization */
261 for (c
= 0; c
< BITS_IN_EWORD
; ++c
, ++pos
) {
262 if ((self
->buffer
[pointer
] & ((eword_t
)1 << c
)) != 0)
263 callback(pos
, payload
);
272 * Clear all the bits in the bitmap. Does not free or resize
275 static void ewah_clear(struct ewah_bitmap
*self
)
277 self
->buffer_size
= 1;
280 self
->rlw
= self
->buffer
;
283 struct ewah_bitmap
*ewah_new(void)
285 struct ewah_bitmap
*self
;
287 self
= xmalloc(sizeof(struct ewah_bitmap
));
288 self
->alloc_size
= 32;
289 ALLOC_ARRAY(self
->buffer
, self
->alloc_size
);
295 void ewah_free(struct ewah_bitmap
*self
)
300 if (self
->alloc_size
)
306 static void read_new_rlw(struct ewah_iterator
*it
)
308 const eword_t
*word
= NULL
;
314 word
= &it
->buffer
[it
->pointer
];
316 it
->rl
= rlw_get_running_len(word
);
317 it
->lw
= rlw_get_literal_words(word
);
318 it
->b
= rlw_get_run_bit(word
);
320 if (it
->rl
|| it
->lw
)
323 if (it
->pointer
< it
->buffer_size
- 1) {
326 it
->pointer
= it
->buffer_size
;
332 int ewah_iterator_next(eword_t
*next
, struct ewah_iterator
*it
)
334 if (it
->pointer
>= it
->buffer_size
)
337 if (it
->compressed
< it
->rl
) {
339 *next
= it
->b
? (eword_t
)(~0) : 0;
341 assert(it
->literals
< it
->lw
);
346 assert(it
->pointer
< it
->buffer_size
);
348 *next
= it
->buffer
[it
->pointer
];
351 if (it
->compressed
== it
->rl
&& it
->literals
== it
->lw
) {
352 if (++it
->pointer
< it
->buffer_size
)
359 void ewah_iterator_init(struct ewah_iterator
*it
, struct ewah_bitmap
*parent
)
361 it
->buffer
= parent
->buffer
;
362 it
->buffer_size
= parent
->buffer_size
;
371 if (it
->pointer
< it
->buffer_size
)
376 struct ewah_bitmap
*ewah_i
,
377 struct ewah_bitmap
*ewah_j
,
378 struct ewah_bitmap
*out
)
380 struct rlw_iterator rlw_i
;
381 struct rlw_iterator rlw_j
;
384 rlwit_init(&rlw_i
, ewah_i
);
385 rlwit_init(&rlw_j
, ewah_j
);
387 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
388 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
389 struct rlw_iterator
*prey
, *predator
;
393 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
401 negate_words
= !!predator
->rlw
.running_bit
;
402 index
= rlwit_discharge(prey
, out
,
403 predator
->rlw
.running_len
, negate_words
);
405 ewah_add_empty_words(out
, negate_words
,
406 predator
->rlw
.running_len
- index
);
408 rlwit_discard_first_words(predator
,
409 predator
->rlw
.running_len
);
413 rlw_i
.rlw
.literal_words
,
414 rlw_j
.rlw
.literal_words
);
419 for (k
= 0; k
< literals
; ++k
) {
421 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] ^
422 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
426 rlwit_discard_first_words(&rlw_i
, literals
);
427 rlwit_discard_first_words(&rlw_j
, literals
);
431 if (rlwit_word_size(&rlw_i
) > 0)
432 rlwit_discharge(&rlw_i
, out
, ~0, 0);
434 rlwit_discharge(&rlw_j
, out
, ~0, 0);
436 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
439 #define BITMAP_POOL_MAX 16
440 static struct ewah_bitmap
*bitmap_pool
[BITMAP_POOL_MAX
];
441 static size_t bitmap_pool_size
;
443 struct ewah_bitmap
*ewah_pool_new(void)
445 if (bitmap_pool_size
)
446 return bitmap_pool
[--bitmap_pool_size
];
451 void ewah_pool_free(struct ewah_bitmap
*self
)
456 if (bitmap_pool_size
== BITMAP_POOL_MAX
||
457 self
->alloc_size
== 0) {
463 bitmap_pool
[bitmap_pool_size
++] = self
;
466 uint32_t ewah_checksum(struct ewah_bitmap
*self
)
468 const uint8_t *p
= (uint8_t *)self
->buffer
;
469 uint32_t crc
= (uint32_t)self
->bit_size
;
470 size_t size
= self
->buffer_size
* sizeof(eword_t
);
473 crc
= (crc
<< 5) - crc
+ (uint32_t)*p
++;