Fix direct (and complex) bitcopy() to actually work (at least tests
[beedb.git] / exp / bitcopy.cc
blobecfbeaec9b79c4ba25d78fb38636881e3ce0709f
1 /*
2 Copyright 2009 Kristian Nielsen
4 This file is part of BeeDB.
6 Foobar is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 2 of the License, or
9 (at your option) any later version.
11 Foobar is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Foobar. If not, see <http://www.gnu.org/licenses/>.
21 Copy Bitstrings of arbitrary bit alignment and length.
24 #include "beedb.h"
25 #include "bitcopy.h"
26 #include "packed.h"
29 void
30 bitcopy_aligned(uint64_t *dst, uint64_t *src, uint64_t wordcount)
32 inline_bitcopy_aligned(dst, src, wordcount);
35 void
36 bitcopy_aligned_src(uint64_t *dst_base, uint64_t dst_pos, uint64_t *src,
37 uint64_t numbits)
39 inline_bitcopy_aligned_src(dst_base, dst_pos, src, numbits);
42 void
43 bitcopy_aligned_dst(uint64_t *dst, uint64_t *src_base, uint64_t src_pos,
44 uint64_t numbits)
46 return inline_bitcopy_aligned_dst(dst, src_base, src_pos, numbits);
50 For the little-endian version, bit positions count from least significant
51 bit up, so position 0 is 0/1, position 1 is 0/2, position 2 is 0/4, and so
52 on.
54 This way, bit positions 0-7 occupy the first byte.
58 Copy bit string of size NUMBITS, at arbitrary bit alignment.
60 Size to copy must be > 0.
62 Any remaining bits in the last 64-bit word to be copied to destination are
63 undefined.
65 void
66 bitcopy(uint64_t *dst_base, uint64_t dst_pos,
67 uint64_t *src_base, uint64_t src_pos,
68 uint64_t numbits)
70 uint64_t *dst= dst_base + (dst_pos >> 6);
71 uint64_t dst_word_pos= dst_pos & 0x3f;
72 if (dst_word_pos == 0)
73 return inline_bitcopy_aligned_dst(dst, src_base, src_pos, numbits);
75 uint64_t *src= src_base + (src_pos >> 6);
76 uint64_t src_word_pos= src_pos & 0x3f;
77 if (src_word_pos == 0)
78 return inline_bitcopy_aligned_src(dst_base, dst_pos, src, numbits);
81 Here we know that neither source nor destination are 64-bit aligned,
82 avoiding some potential undefined-shift-by-64-bit problems.
84 uint64_t dst_bits_in_first= 64 - dst_word_pos;
85 /* Preserve bits in first destination word that should not be overwritten. */
86 uint64_t d= *dst & (~(uint64_t)0 >> dst_bits_in_first);
88 Special case of same alignment. In this case we do not want any shifting
89 (and cannot due to undefined shift-by64-bit).
91 if (dst_word_pos == src_word_pos)
93 uint64_t v= *src++;
94 uint64_t* dst_end= dst + ((dst_word_pos + numbits + 0x3f) >> 6);
95 d|= v & (~(uint64_t)0 << dst_word_pos);
96 *dst++= d;
97 while (dst < dst_end)
99 *dst++= *src++;
102 else
105 Now check if there are sufficient bits in the first source word to fill
106 up the fractional first destination word. If so, take them, use them to
107 fill up the first destination word, and use the rest as the first part
108 of next destination word. If not, use as much as is there to prepare the
109 first part of the first destination word.
111 if (dst_word_pos > src_word_pos)
114 We can fill the first destination word completely from the first
115 source word (with bits to spare for the second destination word, if
116 any).
119 uint64_t *src_end= src + ((src_word_pos + numbits + 0x3f) >> 6);
120 uint64_t v= *src++;
121 /* Number of bits in last, fractional destination word (if any). */
122 uint64_t bits_remaining= (dst_word_pos + numbits) & 0x3f;
123 d|= (v >> src_word_pos) << dst_word_pos;
124 uint64_t shift1= dst_word_pos - src_word_pos;
125 uint64_t shift2= dst_bits_in_first + src_word_pos;
126 *dst++= d;
128 while (src < src_end)
130 d= v >> shift2;
131 v= *src++;
132 d|= v << shift1;
133 *dst++= d;
136 if (bits_remaining > 0 && bits_remaining <= shift1)
137 *dst= v >> shift2;
139 else /* dst_word_pos < src_word_pos */
142 To fill up the first destination word, we need bits from both the
143 first and the second source word.
146 /* Number of bits in last, fractional destination word (if any). */
147 uint64_t bits_remaining= (dst_word_pos + numbits) & 0x3f;
148 /* dst_end points to just past the last whole destination word. */
149 uint64_t *dst_end= dst + ((dst_word_pos + numbits) >> 6);
150 uint64_t v= *src++;
151 d|= (v >> src_word_pos) << dst_word_pos;
152 uint64_t shift1= src_word_pos - dst_word_pos;
153 uint64_t shift2= 64 - shift1;
155 while (dst < dst_end)
157 v= *src++;
158 d|= v << shift2;
159 *dst++= d;
160 d= v >> shift1;
163 if (bits_remaining > 0)
165 if (bits_remaining > shift2)
166 d|= (*src) << shift2;
167 *dst= d;
174 Hm, an alternative implementation using pack() and unpack().
176 I actually kind of like this one better, so benchmark and select this one if
177 performace is not significantly worse. It is soo much simpler.
179 void
180 bitcopy2(uint64_t *dst_base, uint64_t dst_pos,
181 uint64_t *src_base, uint64_t src_pos,
182 uint64_t numbits)
184 pack_overwriteptr dst(dst_base, dst_pos);
185 uint64_t src_word_pos= src_pos & 0x3f;
186 uint64_t bits_in_first= 64 - src_word_pos;
187 uint64_t *src= src_base + (src_pos >> 6);
188 if (bits_in_first >= numbits)
190 /* Everything available in first source word. */
191 dst.putbits(numbits, *src >> src_word_pos);
193 else
195 dst.putbits(bits_in_first, (*src++) >> src_word_pos);
196 uint64_t end_bit_pos= src_pos + numbits;
197 uint64_t *src_end= src_base + (end_bit_pos >> 6);
198 while (src < src_end)
199 dst.putbits(64, *src++);
200 uint64_t remain_bits= end_bit_pos & 0x3f;
201 if (remain_bits > 0)
202 dst.putbits(remain_bits, *src & (~(uint64_t)0 >> (64 - remain_bits)));
204 dst.flush();