2 * Copyright 2013, GitHub, Inc
3 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4 * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 #include "git-compat-util.h"
24 static inline size_t min_size(size_t a
, size_t b
)
29 static inline size_t max_size(size_t a
, size_t b
)
34 static inline void buffer_grow(struct ewah_bitmap
*self
, size_t new_size
)
36 size_t rlw_offset
= (uint8_t *)self
->rlw
- (uint8_t *)self
->buffer
;
38 if (self
->alloc_size
>= new_size
)
41 self
->alloc_size
= new_size
;
42 REALLOC_ARRAY(self
->buffer
, self
->alloc_size
);
43 self
->rlw
= self
->buffer
+ (rlw_offset
/ sizeof(eword_t
));
46 static inline void buffer_push(struct ewah_bitmap
*self
, eword_t value
)
48 if (self
->buffer_size
+ 1 >= self
->alloc_size
)
49 buffer_grow(self
, self
->buffer_size
* 3 / 2);
51 self
->buffer
[self
->buffer_size
++] = value
;
54 static void buffer_push_rlw(struct ewah_bitmap
*self
, eword_t value
)
56 buffer_push(self
, value
);
57 self
->rlw
= self
->buffer
+ self
->buffer_size
- 1;
60 static size_t add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
63 eword_t runlen
, can_add
;
65 if (rlw_get_run_bit(self
->rlw
) != v
&& rlw_size(self
->rlw
) == 0) {
66 rlw_set_run_bit(self
->rlw
, v
);
67 } else if (rlw_get_literal_words(self
->rlw
) != 0 ||
68 rlw_get_run_bit(self
->rlw
) != v
) {
69 buffer_push_rlw(self
, 0);
70 if (v
) rlw_set_run_bit(self
->rlw
, v
);
74 runlen
= rlw_get_running_len(self
->rlw
);
75 can_add
= min_size(number
, RLW_LARGEST_RUNNING_COUNT
- runlen
);
77 rlw_set_running_len(self
->rlw
, runlen
+ can_add
);
80 while (number
>= RLW_LARGEST_RUNNING_COUNT
) {
81 buffer_push_rlw(self
, 0);
83 if (v
) rlw_set_run_bit(self
->rlw
, v
);
84 rlw_set_running_len(self
->rlw
, RLW_LARGEST_RUNNING_COUNT
);
85 number
-= RLW_LARGEST_RUNNING_COUNT
;
89 buffer_push_rlw(self
, 0);
92 if (v
) rlw_set_run_bit(self
->rlw
, v
);
93 rlw_set_running_len(self
->rlw
, number
);
99 size_t ewah_add_empty_words(struct ewah_bitmap
*self
, int v
, size_t number
)
104 self
->bit_size
+= number
* BITS_IN_EWORD
;
105 return add_empty_words(self
, v
, number
);
108 static size_t add_literal(struct ewah_bitmap
*self
, eword_t new_data
)
110 eword_t current_num
= rlw_get_literal_words(self
->rlw
);
112 if (current_num
>= RLW_LARGEST_LITERAL_COUNT
) {
113 buffer_push_rlw(self
, 0);
115 rlw_set_literal_words(self
->rlw
, 1);
116 buffer_push(self
, new_data
);
120 rlw_set_literal_words(self
->rlw
, current_num
+ 1);
123 assert(rlw_get_literal_words(self
->rlw
) == current_num
+ 1);
125 buffer_push(self
, new_data
);
129 void ewah_add_dirty_words(
130 struct ewah_bitmap
*self
, const eword_t
*buffer
,
131 size_t number
, int negate
)
133 size_t literals
, can_add
;
136 literals
= rlw_get_literal_words(self
->rlw
);
137 can_add
= min_size(number
, RLW_LARGEST_LITERAL_COUNT
- literals
);
139 rlw_set_literal_words(self
->rlw
, literals
+ can_add
);
141 if (self
->buffer_size
+ can_add
>= self
->alloc_size
)
142 buffer_grow(self
, (self
->buffer_size
+ can_add
) * 3 / 2);
146 for (i
= 0; i
< can_add
; ++i
)
147 self
->buffer
[self
->buffer_size
++] = ~buffer
[i
];
149 memcpy(self
->buffer
+ self
->buffer_size
,
150 buffer
, can_add
* sizeof(eword_t
));
151 self
->buffer_size
+= can_add
;
154 self
->bit_size
+= can_add
* BITS_IN_EWORD
;
156 if (number
- can_add
== 0)
159 buffer_push_rlw(self
, 0);
165 static size_t add_empty_word(struct ewah_bitmap
*self
, int v
)
167 int no_literal
= (rlw_get_literal_words(self
->rlw
) == 0);
168 eword_t run_len
= rlw_get_running_len(self
->rlw
);
170 if (no_literal
&& run_len
== 0) {
171 rlw_set_run_bit(self
->rlw
, v
);
172 assert(rlw_get_run_bit(self
->rlw
) == v
);
175 if (no_literal
&& rlw_get_run_bit(self
->rlw
) == v
&&
176 run_len
< RLW_LARGEST_RUNNING_COUNT
) {
177 rlw_set_running_len(self
->rlw
, run_len
+ 1);
178 assert(rlw_get_running_len(self
->rlw
) == run_len
+ 1);
181 buffer_push_rlw(self
, 0);
183 assert(rlw_get_running_len(self
->rlw
) == 0);
184 assert(rlw_get_run_bit(self
->rlw
) == 0);
185 assert(rlw_get_literal_words(self
->rlw
) == 0);
187 rlw_set_run_bit(self
->rlw
, v
);
188 assert(rlw_get_run_bit(self
->rlw
) == v
);
190 rlw_set_running_len(self
->rlw
, 1);
191 assert(rlw_get_running_len(self
->rlw
) == 1);
192 assert(rlw_get_literal_words(self
->rlw
) == 0);
197 size_t ewah_add(struct ewah_bitmap
*self
, eword_t word
)
199 self
->bit_size
+= BITS_IN_EWORD
;
202 return add_empty_word(self
, 0);
204 if (word
== (eword_t
)(~0))
205 return add_empty_word(self
, 1);
207 return add_literal(self
, word
);
210 void ewah_set(struct ewah_bitmap
*self
, size_t i
)
213 (i
+ BITS_IN_EWORD
) / BITS_IN_EWORD
-
214 (self
->bit_size
+ BITS_IN_EWORD
- 1) / BITS_IN_EWORD
;
216 assert(i
>= self
->bit_size
);
218 self
->bit_size
= i
+ 1;
222 add_empty_words(self
, 0, dist
- 1);
224 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
228 if (rlw_get_literal_words(self
->rlw
) == 0) {
229 rlw_set_running_len(self
->rlw
,
230 rlw_get_running_len(self
->rlw
) - 1);
231 add_literal(self
, (eword_t
)1 << (i
% BITS_IN_EWORD
));
235 self
->buffer
[self
->buffer_size
- 1] |=
236 ((eword_t
)1 << (i
% BITS_IN_EWORD
));
238 /* check if we just completed a stream of 1s */
239 if (self
->buffer
[self
->buffer_size
- 1] == (eword_t
)(~0)) {
240 self
->buffer
[--self
->buffer_size
] = 0;
241 rlw_set_literal_words(self
->rlw
,
242 rlw_get_literal_words(self
->rlw
) - 1);
243 add_empty_word(self
, 1);
247 void ewah_each_bit(struct ewah_bitmap
*self
, void (*callback
)(size_t, void*), void *payload
)
253 while (pointer
< self
->buffer_size
) {
254 eword_t
*word
= &self
->buffer
[pointer
];
256 if (rlw_get_run_bit(word
)) {
257 size_t len
= rlw_get_running_len(word
) * BITS_IN_EWORD
;
258 for (k
= 0; k
< len
; ++k
, ++pos
)
259 callback(pos
, payload
);
261 pos
+= rlw_get_running_len(word
) * BITS_IN_EWORD
;
266 for (k
= 0; k
< rlw_get_literal_words(word
); ++k
) {
269 /* todo: zero count optimization */
270 for (c
= 0; c
< BITS_IN_EWORD
; ++c
, ++pos
) {
271 if ((self
->buffer
[pointer
] & ((eword_t
)1 << c
)) != 0)
272 callback(pos
, payload
);
280 struct ewah_bitmap
*ewah_new(void)
282 struct ewah_bitmap
*self
;
284 self
= xmalloc(sizeof(struct ewah_bitmap
));
285 self
->alloc_size
= 32;
286 ALLOC_ARRAY(self
->buffer
, self
->alloc_size
);
292 void ewah_clear(struct ewah_bitmap
*self
)
294 self
->buffer_size
= 1;
297 self
->rlw
= self
->buffer
;
300 void ewah_free(struct ewah_bitmap
*self
)
305 if (self
->alloc_size
)
311 static void read_new_rlw(struct ewah_iterator
*it
)
313 const eword_t
*word
= NULL
;
319 word
= &it
->buffer
[it
->pointer
];
321 it
->rl
= rlw_get_running_len(word
);
322 it
->lw
= rlw_get_literal_words(word
);
323 it
->b
= rlw_get_run_bit(word
);
325 if (it
->rl
|| it
->lw
)
328 if (it
->pointer
< it
->buffer_size
- 1) {
331 it
->pointer
= it
->buffer_size
;
337 int ewah_iterator_next(eword_t
*next
, struct ewah_iterator
*it
)
339 if (it
->pointer
>= it
->buffer_size
)
342 if (it
->compressed
< it
->rl
) {
344 *next
= it
->b
? (eword_t
)(~0) : 0;
346 assert(it
->literals
< it
->lw
);
351 assert(it
->pointer
< it
->buffer_size
);
353 *next
= it
->buffer
[it
->pointer
];
356 if (it
->compressed
== it
->rl
&& it
->literals
== it
->lw
) {
357 if (++it
->pointer
< it
->buffer_size
)
364 void ewah_iterator_init(struct ewah_iterator
*it
, struct ewah_bitmap
*parent
)
366 it
->buffer
= parent
->buffer
;
367 it
->buffer_size
= parent
->buffer_size
;
376 if (it
->pointer
< it
->buffer_size
)
380 void ewah_not(struct ewah_bitmap
*self
)
384 while (pointer
< self
->buffer_size
) {
385 eword_t
*word
= &self
->buffer
[pointer
];
388 rlw_xor_run_bit(word
);
391 literals
= rlw_get_literal_words(word
);
392 for (k
= 0; k
< literals
; ++k
) {
393 self
->buffer
[pointer
] = ~self
->buffer
[pointer
];
400 struct ewah_bitmap
*ewah_i
,
401 struct ewah_bitmap
*ewah_j
,
402 struct ewah_bitmap
*out
)
404 struct rlw_iterator rlw_i
;
405 struct rlw_iterator rlw_j
;
408 rlwit_init(&rlw_i
, ewah_i
);
409 rlwit_init(&rlw_j
, ewah_j
);
411 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
412 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
413 struct rlw_iterator
*prey
, *predator
;
417 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
425 negate_words
= !!predator
->rlw
.running_bit
;
426 index
= rlwit_discharge(prey
, out
,
427 predator
->rlw
.running_len
, negate_words
);
429 ewah_add_empty_words(out
, negate_words
,
430 predator
->rlw
.running_len
- index
);
432 rlwit_discard_first_words(predator
,
433 predator
->rlw
.running_len
);
437 rlw_i
.rlw
.literal_words
,
438 rlw_j
.rlw
.literal_words
);
443 for (k
= 0; k
< literals
; ++k
) {
445 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] ^
446 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
450 rlwit_discard_first_words(&rlw_i
, literals
);
451 rlwit_discard_first_words(&rlw_j
, literals
);
455 if (rlwit_word_size(&rlw_i
) > 0)
456 rlwit_discharge(&rlw_i
, out
, ~0, 0);
458 rlwit_discharge(&rlw_j
, out
, ~0, 0);
460 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
464 struct ewah_bitmap
*ewah_i
,
465 struct ewah_bitmap
*ewah_j
,
466 struct ewah_bitmap
*out
)
468 struct rlw_iterator rlw_i
;
469 struct rlw_iterator rlw_j
;
472 rlwit_init(&rlw_i
, ewah_i
);
473 rlwit_init(&rlw_j
, ewah_j
);
475 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
476 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
477 struct rlw_iterator
*prey
, *predator
;
479 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
487 if (predator
->rlw
.running_bit
== 0) {
488 ewah_add_empty_words(out
, 0,
489 predator
->rlw
.running_len
);
490 rlwit_discard_first_words(prey
,
491 predator
->rlw
.running_len
);
492 rlwit_discard_first_words(predator
,
493 predator
->rlw
.running_len
);
495 size_t index
= rlwit_discharge(prey
, out
,
496 predator
->rlw
.running_len
, 0);
497 ewah_add_empty_words(out
, 0,
498 predator
->rlw
.running_len
- index
);
499 rlwit_discard_first_words(predator
,
500 predator
->rlw
.running_len
);
505 rlw_i
.rlw
.literal_words
,
506 rlw_j
.rlw
.literal_words
);
511 for (k
= 0; k
< literals
; ++k
) {
513 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] &
514 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
518 rlwit_discard_first_words(&rlw_i
, literals
);
519 rlwit_discard_first_words(&rlw_j
, literals
);
523 if (rlwit_word_size(&rlw_i
) > 0)
524 rlwit_discharge_empty(&rlw_i
, out
);
526 rlwit_discharge_empty(&rlw_j
, out
);
528 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
532 struct ewah_bitmap
*ewah_i
,
533 struct ewah_bitmap
*ewah_j
,
534 struct ewah_bitmap
*out
)
536 struct rlw_iterator rlw_i
;
537 struct rlw_iterator rlw_j
;
540 rlwit_init(&rlw_i
, ewah_i
);
541 rlwit_init(&rlw_j
, ewah_j
);
543 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
544 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
545 struct rlw_iterator
*prey
, *predator
;
547 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
555 if ((predator
->rlw
.running_bit
&& prey
== &rlw_i
) ||
556 (!predator
->rlw
.running_bit
&& prey
!= &rlw_i
)) {
557 ewah_add_empty_words(out
, 0,
558 predator
->rlw
.running_len
);
559 rlwit_discard_first_words(prey
,
560 predator
->rlw
.running_len
);
561 rlwit_discard_first_words(predator
,
562 predator
->rlw
.running_len
);
567 negate_words
= (&rlw_i
!= prey
);
568 index
= rlwit_discharge(prey
, out
,
569 predator
->rlw
.running_len
, negate_words
);
570 ewah_add_empty_words(out
, negate_words
,
571 predator
->rlw
.running_len
- index
);
572 rlwit_discard_first_words(predator
,
573 predator
->rlw
.running_len
);
578 rlw_i
.rlw
.literal_words
,
579 rlw_j
.rlw
.literal_words
);
584 for (k
= 0; k
< literals
; ++k
) {
586 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] &
587 ~(rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
])
591 rlwit_discard_first_words(&rlw_i
, literals
);
592 rlwit_discard_first_words(&rlw_j
, literals
);
596 if (rlwit_word_size(&rlw_i
) > 0)
597 rlwit_discharge(&rlw_i
, out
, ~0, 0);
599 rlwit_discharge_empty(&rlw_j
, out
);
601 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
605 struct ewah_bitmap
*ewah_i
,
606 struct ewah_bitmap
*ewah_j
,
607 struct ewah_bitmap
*out
)
609 struct rlw_iterator rlw_i
;
610 struct rlw_iterator rlw_j
;
613 rlwit_init(&rlw_i
, ewah_i
);
614 rlwit_init(&rlw_j
, ewah_j
);
616 while (rlwit_word_size(&rlw_i
) > 0 && rlwit_word_size(&rlw_j
) > 0) {
617 while (rlw_i
.rlw
.running_len
> 0 || rlw_j
.rlw
.running_len
> 0) {
618 struct rlw_iterator
*prey
, *predator
;
620 if (rlw_i
.rlw
.running_len
< rlw_j
.rlw
.running_len
) {
628 if (predator
->rlw
.running_bit
) {
629 ewah_add_empty_words(out
, 0,
630 predator
->rlw
.running_len
);
631 rlwit_discard_first_words(prey
,
632 predator
->rlw
.running_len
);
633 rlwit_discard_first_words(predator
,
634 predator
->rlw
.running_len
);
636 size_t index
= rlwit_discharge(prey
, out
,
637 predator
->rlw
.running_len
, 0);
638 ewah_add_empty_words(out
, 0,
639 predator
->rlw
.running_len
- index
);
640 rlwit_discard_first_words(predator
,
641 predator
->rlw
.running_len
);
646 rlw_i
.rlw
.literal_words
,
647 rlw_j
.rlw
.literal_words
);
652 for (k
= 0; k
< literals
; ++k
) {
654 rlw_i
.buffer
[rlw_i
.literal_word_start
+ k
] |
655 rlw_j
.buffer
[rlw_j
.literal_word_start
+ k
]
659 rlwit_discard_first_words(&rlw_i
, literals
);
660 rlwit_discard_first_words(&rlw_j
, literals
);
664 if (rlwit_word_size(&rlw_i
) > 0)
665 rlwit_discharge(&rlw_i
, out
, ~0, 0);
667 rlwit_discharge(&rlw_j
, out
, ~0, 0);
669 out
->bit_size
= max_size(ewah_i
->bit_size
, ewah_j
->bit_size
);
673 #define BITMAP_POOL_MAX 16
674 static struct ewah_bitmap
*bitmap_pool
[BITMAP_POOL_MAX
];
675 static size_t bitmap_pool_size
;
677 struct ewah_bitmap
*ewah_pool_new(void)
679 if (bitmap_pool_size
)
680 return bitmap_pool
[--bitmap_pool_size
];
685 void ewah_pool_free(struct ewah_bitmap
*self
)
690 if (bitmap_pool_size
== BITMAP_POOL_MAX
||
691 self
->alloc_size
== 0) {
697 bitmap_pool
[bitmap_pool_size
++] = self
;
700 uint32_t ewah_checksum(struct ewah_bitmap
*self
)
702 const uint8_t *p
= (uint8_t *)self
->buffer
;
703 uint32_t crc
= (uint32_t)self
->bit_size
;
704 size_t size
= self
->buffer_size
* sizeof(eword_t
);
707 crc
= (crc
<< 5) - crc
+ (uint32_t)*p
++;