2 Copyright 2009 Kristian Nielsen
4 This file is part of BeeDB.
6 Foobar is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 2 of the License, or
9 (at your option) any later version.
11 Foobar is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Foobar. If not, see <http://www.gnu.org/licenses/>.
21 Copy Bitstrings of arbitrary bit alignment and length.
30 bitcopy_aligned(uint64_t *dst
, uint64_t *src
, uint64_t wordcount
)
32 inline_bitcopy_aligned(dst
, src
, wordcount
);
36 bitcopy_aligned_src(uint64_t *dst_base
, uint64_t dst_pos
, uint64_t *src
,
39 inline_bitcopy_aligned_src(dst_base
, dst_pos
, src
, numbits
);
43 bitcopy_aligned_dst(uint64_t *dst
, uint64_t *src_base
, uint64_t src_pos
,
46 return inline_bitcopy_aligned_dst(dst
, src_base
, src_pos
, numbits
);
50 For the little-endian version, bit positions count from least significant
51 bit up, so position 0 is 0/1, position 1 is 0/2, position 2 is 0/4, and so
54 This way, bit positions 0-7 occupy the first byte.
58 Copy bit string of size NUMBITS, at arbitrary bit alignment.
60 Size to copy must be > 0.
62 Any remaining bits in the last 64-bit word to be copied to destination are
66 bitcopy(uint64_t *dst_base
, uint64_t dst_pos
,
67 uint64_t *src_base
, uint64_t src_pos
,
70 uint64_t *dst
= dst_base
+ (dst_pos
>> 6);
71 uint64_t dst_word_pos
= dst_pos
& 0x3f;
72 if (dst_word_pos
== 0)
73 return inline_bitcopy_aligned_dst(dst
, src_base
, src_pos
, numbits
);
75 uint64_t *src
= src_base
+ (src_pos
>> 6);
76 uint64_t src_word_pos
= src_pos
& 0x3f;
77 if (src_word_pos
== 0)
78 return inline_bitcopy_aligned_src(dst_base
, dst_pos
, src
, numbits
);
81 Here we know that neither source nor destination are 64-bit aligned,
82 avoiding some potential undefined-shift-by-64-bit problems.
84 uint64_t dst_bits_in_first
= 64 - dst_word_pos
;
85 /* Preserve bits in first destination word that should not be overwritten. */
86 uint64_t d
= *dst
& (~(uint64_t)0 >> dst_bits_in_first
);
88 Special case of same alignment. In this case we do not want any shifting
89 (and cannot due to undefined shift-by64-bit).
91 if (dst_word_pos
== src_word_pos
)
94 uint64_t* dst_end
= dst
+ ((dst_word_pos
+ numbits
+ 0x3f) >> 6);
95 d
|= v
& (~(uint64_t)0 << dst_word_pos
);
105 Now check if there are sufficient bits in the first source word to fill
106 up the fractional first destination word. If so, take them, use them to
107 fill up the first destination word, and use the rest as the first part
108 of next destination word. If not, use as much as is there to prepare the
109 first part of the first destination word.
111 if (dst_word_pos
> src_word_pos
)
114 We can fill the first destination word completely from the first
115 source word (with bits to spare for the second destination word, if
119 uint64_t *src_end
= src
+ ((src_word_pos
+ numbits
+ 0x3f) >> 6);
121 /* Number of bits in last, fractional destination word (if any). */
122 uint64_t bits_remaining
= (dst_word_pos
+ numbits
) & 0x3f;
123 d
|= (v
>> src_word_pos
) << dst_word_pos
;
124 uint64_t shift1
= dst_word_pos
- src_word_pos
;
125 uint64_t shift2
= dst_bits_in_first
+ src_word_pos
;
128 while (src
< src_end
)
136 if (bits_remaining
> 0 && bits_remaining
<= shift1
)
139 else /* dst_word_pos < src_word_pos */
142 To fill up the first destination word, we need bits from both the
143 first and the second source word.
146 /* Number of bits in last, fractional destination word (if any). */
147 uint64_t bits_remaining
= (dst_word_pos
+ numbits
) & 0x3f;
148 /* dst_end points to just past the last whole destination word. */
149 uint64_t *dst_end
= dst
+ ((dst_word_pos
+ numbits
) >> 6);
151 d
|= (v
>> src_word_pos
) << dst_word_pos
;
152 uint64_t shift1
= src_word_pos
- dst_word_pos
;
153 uint64_t shift2
= 64 - shift1
;
155 while (dst
< dst_end
)
163 if (bits_remaining
> 0)
165 if (bits_remaining
> shift2
)
166 d
|= (*src
) << shift2
;
174 Hm, an alternative implementation using pack() and unpack().
176 I actually kind of like this one better, so benchmark and select this one if
177 performace is not significantly worse. It is soo much simpler.
180 bitcopy2(uint64_t *dst_base
, uint64_t dst_pos
,
181 uint64_t *src_base
, uint64_t src_pos
,
184 pack_overwriteptr
dst(dst_base
, dst_pos
);
185 uint64_t src_word_pos
= src_pos
& 0x3f;
186 uint64_t bits_in_first
= 64 - src_word_pos
;
187 uint64_t *src
= src_base
+ (src_pos
>> 6);
188 if (bits_in_first
>= numbits
)
190 /* Everything available in first source word. */
191 dst
.putbits(numbits
, *src
>> src_word_pos
);
195 dst
.putbits(bits_in_first
, (*src
++) >> src_word_pos
);
196 uint64_t end_bit_pos
= src_pos
+ numbits
;
197 uint64_t *src_end
= src_base
+ (end_bit_pos
>> 6);
198 while (src
< src_end
)
199 dst
.putbits(64, *src
++);
200 uint64_t remain_bits
= end_bit_pos
& 0x3f;
202 dst
.putbits(remain_bits
, *src
& (~(uint64_t)0 >> (64 - remain_bits
)));
208 bitcopy3(uint64_t *dst_base
, uint64_t dst_pos
,
209 uint64_t *src_base
, uint64_t src_pos
,
212 pack_overwriteptr
dst(dst_base
, dst_pos
);
213 uint64_t src_word_pos
= src_pos
& 0x3f;
214 uint64_t bits_in_first
= 64 - src_word_pos
;
215 uint64_t *src
= src_base
+ (src_pos
>> 6);
216 if (bits_in_first
>= numbits
)
218 /* Everything available in first source word. */
219 dst
.putbits(numbits
, *src
>> src_word_pos
);
223 dst
.putbits(bits_in_first
, (*src
++) >> src_word_pos
);
224 uint64_t end_bit_pos
= src_pos
+ numbits
;
225 uint64_t *src_end
= src_base
+ (end_bit_pos
>> 6);
226 while (src
< src_end
)
227 dst
.putbits64(*src
++);
228 uint64_t remain_bits
= end_bit_pos
& 0x3f;
230 dst
.putbits(remain_bits
, *src
& (~(uint64_t)0 >> (64 - remain_bits
)));