1 /** @file sortable-serialise.cc
2 * @brief Serialise floating point values to string which sort the same way.
4 /* Copyright (C) 2007,2009,2015 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #ifndef XAPIAN_UNITTEST
24 # include <xapian/queryparser.h>
25 // Only enable exceptions for the testsuite - we don't want them in a library
26 // build as these functions are marked not to throw exceptions in the API
28 # define UNITTEST_ASSERT(X)
30 # include "omassert.h"
31 # define UNITTEST_ASSERT(X) Assert(X)
43 # error Code currently assumes FLT_RADIX == 2
47 // Disable warning about negating an unsigned type, which we do deliberately.
48 # pragma warning(disable:4146)
52 Xapian::sortable_serialise_(double value
, char * buf
) XAPIAN_NOEXCEPT
58 if (value
< -DBL_MAX
) return 0;
60 mantissa
= frexp(value
, &exponent
);
62 /* Deal with zero specially.
64 * IEEE representation of doubles uses 11 bits for the exponent, with a
65 * bias of 1023. We bias this by subtracting 8, and non-IEEE
66 * representations may allow higher exponents, so allow exponents down to
67 * -2039 - if smaller exponents are possible anywhere, we underflow such
70 if (mantissa
== 0.0 || exponent
< -2039) {
75 bool negative
= (mantissa
< 0);
76 if (negative
) mantissa
= -mantissa
;
78 // Infinity, or extremely large non-IEEE representation.
79 if (value
> DBL_MAX
|| exponent
> 2055) {
81 // This can only happen with a non-IEEE representation, because
82 // we've already tested for value < -DBL_MAX
85 memset(buf
, '\xff', 9);
92 // [ 7 | 6 | 5 | 4 3 2 1 0]
95 // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
96 // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
97 // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
98 unsigned char next
= (negative
? 0 : 0xe0);
100 // Bias the exponent by 8 so that more small integers get short encodings.
102 bool exponent_negative
= (exponent
< 0);
103 if (exponent_negative
) {
104 exponent
= -exponent
;
110 /* We store the exponent in 3 or 11 bits. If the number is negative, we
111 * flip all the bits of the exponent, since larger negative numbers should
114 * If the exponent is negative, we flip the bits of the exponent, since
115 * larger negative exponents should sort first (unless the number is
116 * negative, in which case they should sort later).
118 UNITTEST_ASSERT(exponent
>= 0);
121 next
|= static_cast<unsigned char>(exponent
<< 2);
122 if (negative
^ exponent_negative
) next
^= 0x1c;
124 UNITTEST_ASSERT((exponent
>> 11) == 0);
125 // Put the top 5 bits of the exponent into the lower 5 bits of the
127 next
|= static_cast<unsigned char>(exponent
>> 6);
128 if (negative
^ exponent_negative
) next
^= 0x1f;
130 // And the lower 6 bits of the exponent go into the upper 6 bits
131 // of the second byte:
132 next
= static_cast<unsigned char>(exponent
) << 2;
133 if (negative
^ exponent_negative
) next
^= 0xfc;
136 // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
137 mantissa
*= 1 << (negative
? 26 : 27);
138 unsigned word1
= static_cast<unsigned>(mantissa
);
140 unsigned word2
= static_cast<unsigned>(mantissa
* 4294967296.0); // 1<<32
141 // If the number is positive, the first bit will always be set because 0.5
142 // <= mantissa < 1, unless mantissa is zero, which we handle specially
143 // above). If the number is negative, we negate the mantissa instead of
144 // flipping all the bits, so in the case of 0.5, the first bit isn't set
145 // so we need to store it explicitly. But for the cost of one extra
146 // leading bit, we can save several trailing 0xff bytes in lots of common
148 UNITTEST_ASSERT(negative
|| (word1
& (1<<26)));
150 // We negate the mantissa for negative numbers, so that the sort order
151 // is reversed (since larger negative numbers should come first).
153 if (word2
!= 0) ++word1
;
158 next
|= static_cast<unsigned char>(word1
>> 24);
160 buf
[len
++] = char(word1
>> 16);
161 buf
[len
++] = char(word1
>> 8);
162 buf
[len
++] = char(word1
);
164 buf
[len
++] = char(word2
>> 24);
165 buf
[len
++] = char(word2
>> 16);
166 buf
[len
++] = char(word2
>> 8);
167 buf
[len
++] = char(word2
);
169 // Finally, we can chop off any trailing zero bytes.
170 while (len
> 0 && buf
[len
- 1] == '\0') {
177 /// Get a number from the character at a given position in a string, returning
178 /// 0 if the string isn't long enough.
179 static inline unsigned char
180 numfromstr(const std::string
& str
, std::string::size_type pos
)
182 return (pos
< str
.size()) ? static_cast<unsigned char>(str
[pos
]) : '\0';
186 Xapian::sortable_unserialise(const std::string
& value
) XAPIAN_NOEXCEPT
189 if (value
.size() == 1 && value
[0] == '\x80') return 0.0;
191 // Positive infinity.
192 if (value
.size() == 9 &&
193 memcmp(value
.data(), "\xff\xff\xff\xff\xff\xff\xff\xff\xff", 9) == 0) {
195 // INFINITY is C99. Oddly, it's of type "float" so sanity check in
196 // case it doesn't cast to double as infinity (apparently some
197 // implementations have this problem).
198 if (double(INFINITY
) > HUGE_VAL
) return INFINITY
;
203 // Negative infinity.
206 if (double(INFINITY
) > HUGE_VAL
) return -INFINITY
;
211 unsigned char first
= numfromstr(value
, 0);
214 first
^= static_cast<unsigned char>(first
& 0xc0) >> 1;
215 bool negative
= !(first
& 0x80);
216 bool exponent_negative
= (first
& 0x40);
217 bool explen
= !(first
& 0x20);
218 int exponent
= first
& 0x1f;
221 if (negative
^ exponent_negative
) exponent
^= 0x07;
223 first
= numfromstr(value
, ++i
);
225 exponent
|= (first
>> 2);
226 if (negative
^ exponent_negative
) exponent
^= 0x07ff;
230 word1
= (unsigned(first
& 0x03) << 24);
231 word1
|= numfromstr(value
, ++i
) << 16;
232 word1
|= numfromstr(value
, ++i
) << 8;
233 word1
|= numfromstr(value
, ++i
);
236 if (i
< value
.size()) {
237 word2
= numfromstr(value
, ++i
) << 24;
238 word2
|= numfromstr(value
, ++i
) << 16;
239 word2
|= numfromstr(value
, ++i
) << 8;
240 word2
|= numfromstr(value
, ++i
);
245 if (word2
!= 0) ++word1
;
247 UNITTEST_ASSERT((word1
& 0xf0000000) != 0);
250 if (!negative
) word1
|= 1<<26;
253 if (word2
) mantissa
= word2
/ 4294967296.0; // 1<<32
255 mantissa
/= 1 << (negative
? 26 : 27);
257 if (exponent_negative
) exponent
= -exponent
;
260 if (negative
) mantissa
= -mantissa
;
262 // We use scalbn() since it's equivalent to ldexp() when FLT_RADIX == 2
263 // (which we currently assume), except that ldexp() will set errno if the
264 // result overflows or underflows, which isn't really desirable here.
265 return scalbn(mantissa
, exponent
);