2 * Copyright 2013, GitHub, Inc
3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4 * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 #include "git-compat-util.h"
24 static inline size_t min_size(size_t a
, size_t b
)
29 static inline size_t max_size(size_t a
, size_t b
)
34 static inline void buffer_grow(struct ewah_bitmap
*self
, size_t new_size
)
36 size_t rlw_offset
= (uint8_t *)self
->rlw
- (uint8_t *)self
->buffer
;
38 if (self
->alloc_size
>= new_size
)
41 self
->alloc_size
= new_size
;
42 self
->buffer
= ewah_realloc(self
->buffer
,
43 self
->alloc_size
* sizeof(eword_t
));
44 self
->rlw
= self
->buffer
+ (rlw_offset
/ sizeof(eword_t
));
47 static inline void buffer_push(struct ewah_bitmap
*self
, eword_t value
)
49 if (self
->buffer_size
+ 1 >= self
->alloc_size
)
50 buffer_grow(self
, self
->buffer_size
* 3 / 2);
52 self
->buffer
[self
->buffer_size
++] = value
;
55 static void buffer_push_rlw(struct ewah_bitmap
*self
, eword_t value
)
57 buffer_push(self
, value
);
58 self
->rlw
= self
->buffer
+ self
->buffer_size
- 1;
61 static size_t add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
64 eword_t runlen
, can_add
;
66 if (rlw_get_run_bit(self
->rlw
) != v
&& rlw_size(self
->rlw
) == 0) {
67 rlw_set_run_bit(self
->rlw
, v
);
68 } else if (rlw_get_literal_words(self
->rlw
) != 0 ||
69 rlw_get_run_bit(self
->rlw
) != v
) {
70 buffer_push_rlw(self
, 0);
71 if (v
) rlw_set_run_bit(self
->rlw
, v
);
75 runlen
= rlw_get_running_len(self
->rlw
);
76 can_add
= min_size(number
, RLW_LARGEST_RUNNING_COUNT
- runlen
);
78 rlw_set_running_len(self
->rlw
, runlen
+ can_add
);
81 while (number
>= RLW_LARGEST_RUNNING_COUNT
) {
82 buffer_push_rlw(self
, 0);
84 if (v
) rlw_set_run_bit(self
->rlw
, v
);
85 rlw_set_running_len(self
->rlw
, RLW_LARGEST_RUNNING_COUNT
);
86 number
-= RLW_LARGEST_RUNNING_COUNT
;
90 buffer_push_rlw(self
, 0);
93 if (v
) rlw_set_run_bit(self
->rlw
, v
);
94 rlw_set_running_len(self
->rlw
, number
);
100 size_t ewah_add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
105 self
->bit_size
+= number
* BITS_IN_WORD
;
106 return add_empty_words(self
, v
, number
);
109 static size_t add_literal(struct ewah_bitmap
*self
, eword_t new_data
)
111 eword_t current_num
= rlw_get_literal_words(self
->rlw
);
113 if (current_num
>= RLW_LARGEST_LITERAL_COUNT
) {
114 buffer_push_rlw(self
, 0);
116 rlw_set_literal_words(self
->rlw
, 1);
117 buffer_push(self
, new_data
);
121 rlw_set_literal_words(self
->rlw
, current_num
+ 1);
124 assert(rlw_get_literal_words(self
->rlw
) == current_num
+ 1);
126 buffer_push(self
, new_data
);
130 void ewah_add_dirty_words(
131 struct ewah_bitmap
*self
, const eword_t
*buffer
,
132 size_t number
, int negate
)
134 size_t literals
, can_add
;
137 literals
= rlw_get_literal_words(self
->rlw
);
138 can_add
= min_size(number
, RLW_LARGEST_LITERAL_COUNT
- literals
);
140 rlw_set_literal_words(self
->rlw
, literals
+ can_add
);
142 if (self
->buffer_size
+ can_add
>= self
->alloc_size
)
143 buffer_grow(self
, (self
->buffer_size
+ can_add
) * 3 / 2);
147 for (i
= 0; i
< can_add
; ++i
)
148 self
->buffer
[self
->buffer_size
++] = ~buffer
[i
];
150 memcpy(self
->buffer
+ self
->buffer_size
,
151 buffer
, can_add
* sizeof(eword_t
));
152 self
->buffer_size
+= can_add
;
155 self
->bit_size
+= can_add
* BITS_IN_WORD
;
157 if (number
- can_add
== 0)
160 buffer_push_rlw(self
, 0);
166 static size_t add_empty_word(struct ewah_bitmap
*self
, int v
)
168 int no_literal
= (rlw_get_literal_words(self
->rlw
) == 0);
169 eword_t run_len
= rlw_get_running_len(self
->rlw
);
171 if (no_literal
&& run_len
== 0) {
172 rlw_set_run_bit(self
->rlw
, v
);
173 assert(rlw_get_run_bit(self
->rlw
) == v
);
176 if (no_literal
&& rlw_get_run_bit(self
->rlw
) == v
&&
177 run_len
< RLW_LARGEST_RUNNING_COUNT
) {
178 rlw_set_running_len(self
->rlw
, run_len
+ 1);
179 assert(rlw_get_running_len(self
->rlw
) == run_len
+ 1);
182 buffer_push_rlw(self
, 0);
184 assert(rlw_get_running_len(self
->rlw
) == 0);
185 assert(rlw_get_run_bit(self
->rlw
) == 0);
186 assert(rlw_get_literal_words(self
->rlw
) == 0);
188 rlw_set_run_bit(self
->rlw
, v
);
189 assert(rlw_get_run_bit(self
->rlw
) == v
);
191 rlw_set_running_len(self
->rlw
, 1);
192 assert(rlw_get_running_len(self
->rlw
) == 1);
193 assert(rlw_get_literal_words(self
->rlw
) == 0);
198 size_t ewah_add(struct ewah_bitmap
*self
, eword_t word
)
200 self
->bit_size
+= BITS_IN_WORD
;
203 return add_empty_word(self
, 0);
205 if (word
== (eword_t
)(~0))
206 return add_empty_word(self
, 1);
208 return add_literal(self
, word
);
211 void ewah_set(struct ewah_bitmap
*self
, size_t i
)
214 (i
+ BITS_IN_WORD
) / BITS_IN_WORD
-
215 (self
->bit_size
+ BITS_IN_WORD
- 1) / BITS_IN_WORD
;
217 assert(i
>= self
->bit_size
);
219 self
->bit_size
= i
+ 1;
223 add_empty_words(self
, 0, dist
- 1);
225 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_WORD
));
229 if (rlw_get_literal_words(self
->rlw
) == 0) {
230 rlw_set_running_len(self
->rlw
,
231 rlw_get_running_len(self
->rlw
) - 1);
232 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_WORD
));
236 self
->buffer
[self
->buffer_size
- 1] |=
237 ((eword_t
)1 << (i
% BITS_IN_WORD
));
239 /* check if we just completed a stream of 1s */
240 if (self
->buffer
[self
->buffer_size
- 1] == (eword_t
)(~0)) {
241 self
->buffer
[--self
->buffer_size
] = 0;
242 rlw_set_literal_words(self
->rlw
,
243 rlw_get_literal_words(self
->rlw
) - 1);
244 add_empty_word(self
, 1);
248 void ewah_each_bit(struct ewah_bitmap
*self
, void (*callback
)(size_t, void*), void *payload
)
254 while (pointer
< self
->buffer_size
) {
255 eword_t
*word
= &self
->buffer
[pointer
];
257 if (rlw_get_run_bit(word
)) {
258 size_t len
= rlw_get_running_len(word
) * BITS_IN_WORD
;
259 for (k
= 0; k
< len
; ++k
, ++pos
)
260 callback(pos
, payload
);
262 pos
+= rlw_get_running_len(word
) * BITS_IN_WORD
;
267 for (k
= 0; k
< rlw_get_literal_words(word
); ++k
) {
270 /* todo: zero count optimization */
271 for (c
= 0; c
< BITS_IN_WORD
; ++c
, ++pos
) {
272 if ((self
->buffer
[pointer
] & ((eword_t
)1 << c
)) != 0)
273 callback(pos
, payload
);
281 struct ewah_bitmap
*ewah_new(void)
283 struct ewah_bitmap
*self
;
285 self
= ewah_malloc(sizeof(struct ewah_bitmap
));
289 self
->buffer
= ewah_malloc(32 * sizeof(eword_t
));
290 self
->alloc_size
= 32;
296 void ewah_clear(struct ewah_bitmap
*self
)
298 self
->buffer_size
= 1;
301 self
->rlw
= self
->buffer
;
304 void ewah_free(struct ewah_bitmap
*self
)
309 if (self
->alloc_size
)
315 static void read_new_rlw(struct ewah_iterator
*it
)
317 const eword_t
*word
= NULL
;
323 word
= &it
->buffer
[it
->pointer
];
325 it
->rl
= rlw_get_running_len(word
);
326 it
->lw
= rlw_get_literal_words(word
);
327 it
->b
= rlw_get_run_bit(word
);
329 if (it
->rl
|| it
->lw
)
332 if (it
->pointer
< it
->buffer_size
- 1) {
335 it
->pointer
= it
->buffer_size
;
341 int ewah_iterator_next(eword_t
*next
, struct ewah_iterator
*it
)
343 if (it
->pointer
>= it
->buffer_size
)
346 if (it
->compressed
< it
->rl
) {
348 *next
= it
->b
? (eword_t
)(~0) : 0;
350 assert(it
->literals
< it
->lw
);
355 assert(it
->pointer
< it
->buffer_size
);
357 *next
= it
->buffer
[it
->pointer
];
360 if (it
->compressed
== it
->rl
&& it
->literals
== it
->lw
) {
361 if (++it
->pointer
< it
->buffer_size
)
368 void ewah_iterator_init(struct ewah_iterator
*it
, struct ewah_bitmap
*parent
)
370 it
->buffer
= parent
->buffer
;
371 it
->buffer_size
= parent
->buffer_size
;
380 if (it
->pointer
< it
->buffer_size
)
384 void ewah_not(struct ewah_bitmap
*self
)
388 while (pointer
< self
->buffer_size
) {
389 eword_t
*word
= &self
->buffer
[pointer
];
392 rlw_xor_run_bit(word
);
395 literals
= rlw_get_literal_words(word
);
396 for (k
= 0; k
< literals
; ++k
) {
397 self
->buffer
[pointer
] = ~self
->buffer
[pointer
];
404 struct ewah_bitmap
*ewah_i
,
405 struct ewah_bitmap
*ewah_j
,
406 struct ewah_bitmap
*out
)
408 struct rlw_iterator rlw_i
;
409 struct rlw_iterator rlw_j
;
412 rlwit_init(&rlw_i
, ewah_i
);
413 rlwit_init(&rlw_j
, ewah_j
);
415 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
416 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
417 struct rlw_iterator
*prey
, *predator
;
421 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
429 negate_words
= !!predator
->rlw
.running_bit
;
430 index
= rlwit_discharge(prey
, out
,
431 predator
->rlw
.running_len
, negate_words
);
433 ewah_add_empty_words(out
, negate_words
,
434 predator
->rlw
.running_len
- index
);
436 rlwit_discard_first_words(predator
,
437 predator
->rlw
.running_len
);
441 rlw_i
.rlw
.literal_words
,
442 rlw_j
.rlw
.literal_words
);
447 for (k
= 0; k
< literals
; ++k
) {
449 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] ^
450 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
454 rlwit_discard_first_words(&rlw_i
, literals
);
455 rlwit_discard_first_words(&rlw_j
, literals
);
459 if (rlwit_word_size(&rlw_i
) > 0)
460 rlwit_discharge(&rlw_i
, out
, ~0, 0);
462 rlwit_discharge(&rlw_j
, out
, ~0, 0);
464 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
468 struct ewah_bitmap
*ewah_i
,
469 struct ewah_bitmap
*ewah_j
,
470 struct ewah_bitmap
*out
)
472 struct rlw_iterator rlw_i
;
473 struct rlw_iterator rlw_j
;
476 rlwit_init(&rlw_i
, ewah_i
);
477 rlwit_init(&rlw_j
, ewah_j
);
479 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
480 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
481 struct rlw_iterator
*prey
, *predator
;
483 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
491 if (predator
->rlw
.running_bit
== 0) {
492 ewah_add_empty_words(out
, 0,
493 predator
->rlw
.running_len
);
494 rlwit_discard_first_words(prey
,
495 predator
->rlw
.running_len
);
496 rlwit_discard_first_words(predator
,
497 predator
->rlw
.running_len
);
499 size_t index
= rlwit_discharge(prey
, out
,
500 predator
->rlw
.running_len
, 0);
501 ewah_add_empty_words(out
, 0,
502 predator
->rlw
.running_len
- index
);
503 rlwit_discard_first_words(predator
,
504 predator
->rlw
.running_len
);
509 rlw_i
.rlw
.literal_words
,
510 rlw_j
.rlw
.literal_words
);
515 for (k
= 0; k
< literals
; ++k
) {
517 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] &
518 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
522 rlwit_discard_first_words(&rlw_i
, literals
);
523 rlwit_discard_first_words(&rlw_j
, literals
);
527 if (rlwit_word_size(&rlw_i
) > 0)
528 rlwit_discharge_empty(&rlw_i
, out
);
530 rlwit_discharge_empty(&rlw_j
, out
);
532 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
536 struct ewah_bitmap
*ewah_i
,
537 struct ewah_bitmap
*ewah_j
,
538 struct ewah_bitmap
*out
)
540 struct rlw_iterator rlw_i
;
541 struct rlw_iterator rlw_j
;
544 rlwit_init(&rlw_i
, ewah_i
);
545 rlwit_init(&rlw_j
, ewah_j
);
547 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
548 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
549 struct rlw_iterator
*prey
, *predator
;
551 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
559 if ((predator
->rlw
.running_bit
&& prey
== &rlw_i
) ||
560 (!predator
->rlw
.running_bit
&& prey
!= &rlw_i
)) {
561 ewah_add_empty_words(out
, 0,
562 predator
->rlw
.running_len
);
563 rlwit_discard_first_words(prey
,
564 predator
->rlw
.running_len
);
565 rlwit_discard_first_words(predator
,
566 predator
->rlw
.running_len
);
571 negate_words
= (&rlw_i
!= prey
);
572 index
= rlwit_discharge(prey
, out
,
573 predator
->rlw
.running_len
, negate_words
);
574 ewah_add_empty_words(out
, negate_words
,
575 predator
->rlw
.running_len
- index
);
576 rlwit_discard_first_words(predator
,
577 predator
->rlw
.running_len
);
582 rlw_i
.rlw
.literal_words
,
583 rlw_j
.rlw
.literal_words
);
588 for (k
= 0; k
< literals
; ++k
) {
590 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] &
591 ~(rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
])
595 rlwit_discard_first_words(&rlw_i
, literals
);
596 rlwit_discard_first_words(&rlw_j
, literals
);
600 if (rlwit_word_size(&rlw_i
) > 0)
601 rlwit_discharge(&rlw_i
, out
, ~0, 0);
603 rlwit_discharge_empty(&rlw_j
, out
);
605 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
609 struct ewah_bitmap
*ewah_i
,
610 struct ewah_bitmap
*ewah_j
,
611 struct ewah_bitmap
*out
)
613 struct rlw_iterator rlw_i
;
614 struct rlw_iterator rlw_j
;
617 rlwit_init(&rlw_i
, ewah_i
);
618 rlwit_init(&rlw_j
, ewah_j
);
620 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
621 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
622 struct rlw_iterator
*prey
, *predator
;
624 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
632 if (predator
->rlw
.running_bit
) {
633 ewah_add_empty_words(out
, 0,
634 predator
->rlw
.running_len
);
635 rlwit_discard_first_words(prey
,
636 predator
->rlw
.running_len
);
637 rlwit_discard_first_words(predator
,
638 predator
->rlw
.running_len
);
640 size_t index
= rlwit_discharge(prey
, out
,
641 predator
->rlw
.running_len
, 0);
642 ewah_add_empty_words(out
, 0,
643 predator
->rlw
.running_len
- index
);
644 rlwit_discard_first_words(predator
,
645 predator
->rlw
.running_len
);
650 rlw_i
.rlw
.literal_words
,
651 rlw_j
.rlw
.literal_words
);
656 for (k
= 0; k
< literals
; ++k
) {
658 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] |
659 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
663 rlwit_discard_first_words(&rlw_i
, literals
);
664 rlwit_discard_first_words(&rlw_j
, literals
);
668 if (rlwit_word_size(&rlw_i
) > 0)
669 rlwit_discharge(&rlw_i
, out
, ~0, 0);
671 rlwit_discharge(&rlw_j
, out
, ~0, 0);
673 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
677 #define BITMAP_POOL_MAX 16
678 static struct ewah_bitmap
*bitmap_pool
[BITMAP_POOL_MAX
];
679 static size_t bitmap_pool_size
;
681 struct ewah_bitmap
*ewah_pool_new(void)
683 if (bitmap_pool_size
)
684 return bitmap_pool
[--bitmap_pool_size
];
689 void ewah_pool_free(struct ewah_bitmap
*self
)
694 if (bitmap_pool_size
== BITMAP_POOL_MAX
||
695 self
->alloc_size
== 0) {
701 bitmap_pool
[bitmap_pool_size
++] = self
;
704 uint32_t ewah_checksum(struct ewah_bitmap
*self
)
706 const uint8_t *p
= (uint8_t *)self
->buffer
;
707 uint32_t crc
= (uint32_t)self
->bit_size
;
708 size_t size
= self
->buffer_size
* sizeof(eword_t
);
711 crc
= (crc
<< 5) - crc
+ (uint32_t)*p
++;