From 0714a7ad8de07ffa2d7b9da3df982e8374891fb1 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Fri, 6 Feb 2009 23:34:15 +0100 Subject: [PATCH] Fix direct (and complex) bitcopy() to actually work (at least tests don't fail). Now ready for benchmarking to see if extra complexity over bitcopy2() is worth it. --- exp/bitcopy.cc | 87 ++++++++++++++++++++++++++++++++--------------------- test/Makefile | 4 +-- test/beedb_tester.h | 16 ++++++++++ test/bitop.cc | 12 +++++--- 4 files changed, 78 insertions(+), 41 deletions(-) diff --git a/exp/bitcopy.cc b/exp/bitcopy.cc index 775d83c..ecfbeae 100644 --- a/exp/bitcopy.cc +++ b/exp/bitcopy.cc @@ -54,19 +54,6 @@ bitcopy_aligned_dst(uint64_t *dst, uint64_t *src_base, uint64_t src_pos, This way, bit positions 0-7 occupy the first byte. */ -/* ToDo: use non-temporal stores. */ - -/* - ToDo: Also need to handle the special case where the bitstring to be - copied fits entirely into one destination word, with bits to spare at - either end. - - Alternatively, decide that trailing fractional word is always - undefined. This is likely to be sufficient anyway, it does not appear - common/likely to need to copy bits into a memory region that just fits (what - data would be of known, >64 bit size?). -*/ - /* Copy bit string of size NUMBITS, at arbitrary bit alignment. @@ -80,7 +67,6 @@ bitcopy(uint64_t *dst_base, uint64_t dst_pos, uint64_t *src_base, uint64_t src_pos, uint64_t numbits) { -#if 0 // ToDo uint64_t *dst= dst_base + (dst_pos >> 6); uint64_t dst_word_pos= dst_pos & 0x3f; if (dst_word_pos == 0) @@ -95,17 +81,17 @@ bitcopy(uint64_t *dst_base, uint64_t dst_pos, Here we know that neither source nor destination are 64-bit aligned, avoiding some potential undefined-shift-by-64-bit problems. */ - uint64_t src_bits_in_first= 64 - src_word_pos; uint64_t dst_bits_in_first= 64 - dst_word_pos; - /* Preserve */ + /* Preserve bits in first destination word that should not be overwritten. */ uint64_t d= *dst & (~(uint64_t)0 >> dst_bits_in_first); - uint64_t v= *src++; /* Special case of same alignment. In this case we do not want any shifting (and cannot due to undefined shift-by64-bit). */ if (dst_word_pos == src_word_pos) { + uint64_t v= *src++; + uint64_t* dst_end= dst + ((dst_word_pos + numbits + 0x3f) >> 6); d|= v & (~(uint64_t)0 << dst_word_pos); *dst++= d; while (dst < dst_end) @@ -124,31 +110,64 @@ bitcopy(uint64_t *dst_base, uint64_t dst_pos, */ if (dst_word_pos > src_word_pos) { + /* + We can fill the first destination word completely from the first + source word (with bits to spare for the second destination word, if + any). + */ + + uint64_t *src_end= src + ((src_word_pos + numbits + 0x3f) >> 6); + uint64_t v= *src++; + /* Number of bits in last, fractional destination word (if any). */ + uint64_t bits_remaining= (dst_word_pos + numbits) & 0x3f; d|= (v >> src_word_pos) << dst_word_pos; + uint64_t shift1= dst_word_pos - src_word_pos; + uint64_t shift2= dst_bits_in_first + src_word_pos; *dst++= d; - shift1= dst_word_pos - src_word_pos; - shift2= dst_bits_in_first + src_word_pos; - d= v >> (dst_bits_in_first + src_word_pos); + + while (src < src_end) + { + d= v >> shift2; + v= *src++; + d|= v << shift1; + *dst++= d; + } + + if (bits_remaining > 0 && bits_remaining <= shift1) + *dst= v >> shift2; } else /* dst_word_pos < src_word_pos */ { + /* + To fill up the first destination word, we need bits from both the + first and the second source word. + */ + + /* Number of bits in last, fractional destination word (if any). */ + uint64_t bits_remaining= (dst_word_pos + numbits) & 0x3f; + /* dst_end points to just past the last whole destination word. */ + uint64_t *dst_end= dst + ((dst_word_pos + numbits) >> 6); + uint64_t v= *src++; d|= (v >> src_word_pos) << dst_word_pos; - shift1= src_bits_in_first + dst_word_pos; - shift2= src_word_pos - dst_word_pos; - } - - while (dst < dst_end) - { - v= *src++; - d|= v << shift1; - *dst++= d; - d= v >> shift2; - + uint64_t shift1= src_word_pos - dst_word_pos; + uint64_t shift2= 64 - shift1; + + while (dst < dst_end) + { + v= *src++; + d|= v << shift2; + *dst++= d; + d= v >> shift1; + } + + if (bits_remaining > 0) + { + if (bits_remaining > shift2) + d|= (*src) << shift2; + *dst= d; + } } } - - /* ToDo: Handle any last fractional destination word. */ -#endif } /* diff --git a/test/Makefile b/test/Makefile index eb8e6d4..af4a102 100644 --- a/test/Makefile +++ b/test/Makefile @@ -2,8 +2,8 @@ all: bitop # ToDo: Clean up this mess (need to go to automake I think). bitop: bitop.cc beedb_tester.cc ../exp/bitcopy.cc \ - ../exp/packed.h \ + beedb_tester.h ../exp/packed.h \ ../include/beedb.h \ ../include/port/format_macros.h - g++ -I../include -I../include/port -I../exp bitop.cc \ + g++ -g -I../include -I../include/port -I../exp bitop.cc \ beedb_tester.cc ../exp/bitcopy.cc -o bitop diff --git a/test/beedb_tester.h b/test/beedb_tester.h index 008a213..1fe33dc 100644 --- a/test/beedb_tester.h +++ b/test/beedb_tester.h @@ -25,6 +25,7 @@ class beedb_tester { public: beedb_tester(int n); void run(void (*test)(beedb_tester *)); + template void run(void (*test)(t, beedb_tester *), t arg); void ok(); void ok(const char *format, ...); void nok(); @@ -36,6 +37,21 @@ class beedb_tester { }; /* + This needs to be inline here, the joys of C++ templates + + (C++ needs the definition to be able to generate versions for each + argument type, there is no automatic boxing in C++). +*/ +template +void +beedb_tester::run(void (*test)(t, beedb_tester *), t arg) +{ + current_test++; + (*test)(arg, this); +} + + +/* Simple way to include the file name and line number into a "not ok" message. Idea is to use as t->nok(NOK_IDENT "foobar not found (%d)", x); diff --git a/test/bitop.cc b/test/bitop.cc index 200e4c5..bfb75ca 100644 --- a/test/bitop.cc +++ b/test/bitop.cc @@ -119,9 +119,10 @@ test2(beedb_tester *t) t->ok(); } -/* Test bitcopy2(). */ +/* Test bitcopy() (multiple versions). */ void -test3(beedb_tester *t) +test3(void (*copier)(uint64_t *, uint64_t, uint64_t *, uint64_t, uint64_t), + beedb_tester *t) { const int SZ= 64; uint64_t buf[SZ/8]; @@ -142,7 +143,7 @@ test3(beedb_tester *t) { unsigned char *check = (i ? zeros : ones); memcpy(buf, check, SZ); - bitcopy2(buf, start*8, misc_buf, (start/2)*8, len*8); + (*copier)(buf, start*8, misc_buf, (start/2)*8, len*8); if (start > 0) { if (0 != memcmp((unsigned char *)buf, check, start)) @@ -169,10 +170,11 @@ test3(beedb_tester *t) int main(int argc, char *argv) { - beedb_tester t(3); + beedb_tester t(4); t.run(test1); t.run(test2); - t.run(test3); + t.run(test3, bitcopy2); + t.run(test3, bitcopy); return 0; } -- 2.11.4.GIT