Line data Source code
1 : #ifndef FASTFLOAT_DIGIT_COMPARISON_H
2 : #define FASTFLOAT_DIGIT_COMPARISON_H
3 :
4 : #include <algorithm>
5 : #include <cstdint>
6 : #include <cstring>
7 : #include <iterator>
8 :
9 : #include "float_common.h"
10 : #include "bigint.h"
11 : #include "ascii_number.h"
12 :
13 : namespace fast_float {
14 :
15 : // 1e0 to 1e19
16 : constexpr static uint64_t powers_of_ten_uint64[] = {
17 : 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
18 : 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
19 : 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
20 : 1000000000000000000UL, 10000000000000000000UL};
21 :
22 : // calculate the exponent, in scientific notation, of the number.
23 : // this algorithm is not even close to optimized, but it has no practical
24 : // effect on performance: in order to have a faster algorithm, we'd need
25 : // to slow down performance for faster algorithms, and this is still fast.
26 : template <typename UC>
27 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14
28 : int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
29 0 : uint64_t mantissa = num.mantissa;
30 0 : int32_t exponent = int32_t(num.exponent);
31 0 : while (mantissa >= 10000) {
32 0 : mantissa /= 10000;
33 0 : exponent += 4;
34 : }
35 0 : while (mantissa >= 100) {
36 0 : mantissa /= 100;
37 0 : exponent += 2;
38 : }
39 0 : while (mantissa >= 10) {
40 0 : mantissa /= 10;
41 0 : exponent += 1;
42 : }
43 0 : return exponent;
44 : }
45 :
46 : // this converts a native floating-point number to an extended-precision float.
47 : template <typename T>
48 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
49 : adjusted_mantissa to_extended(T value) noexcept {
50 : using equiv_uint = typename binary_format<T>::equiv_uint;
51 0 : constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
52 0 : constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
53 0 : constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
54 :
55 0 : adjusted_mantissa am;
56 0 : int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
57 : equiv_uint bits;
58 : #if FASTFLOAT_HAS_BIT_CAST
59 : bits = std::bit_cast<equiv_uint>(value);
60 : #else
61 0 : ::memcpy(&bits, &value, sizeof(T));
62 : #endif
63 0 : if ((bits & exponent_mask) == 0) {
64 : // denormal
65 0 : am.power2 = 1 - bias;
66 0 : am.mantissa = bits & mantissa_mask;
67 : } else {
68 : // normal
69 0 : am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
70 0 : am.power2 -= bias;
71 0 : am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
72 : }
73 :
74 0 : return am;
75 : }
76 :
77 : // get the extended precision value of the halfway point between b and b+u.
78 : // we are given a native float that represents b, so we need to adjust it
79 : // halfway between b and b+u.
80 : template <typename T>
81 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
82 : adjusted_mantissa to_extended_halfway(T value) noexcept {
83 0 : adjusted_mantissa am = to_extended(value);
84 0 : am.mantissa <<= 1;
85 0 : am.mantissa += 1;
86 0 : am.power2 -= 1;
87 0 : return am;
88 : }
89 :
90 : // round an extended-precision float to the nearest machine float.
91 : template <typename T, typename callback>
92 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14
93 : void round(adjusted_mantissa& am, callback cb) noexcept {
94 0 : int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
95 0 : if (-am.power2 >= mantissa_shift) {
96 : // have a denormal float
97 0 : int32_t shift = -am.power2 + 1;
98 0 : cb(am, std::min<int32_t>(shift, 64));
99 : // check for round-up: if rounding-nearest carried us to the hidden bit.
100 0 : am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
101 0 : return;
102 : }
103 :
104 : // have a normal float, use the default shift.
105 0 : cb(am, mantissa_shift);
106 :
107 : // check for carry
108 0 : if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
109 0 : am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
110 0 : am.power2++;
111 : }
112 :
113 : // check for infinite: we could have carried to an infinite power
114 0 : am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
115 0 : if (am.power2 >= binary_format<T>::infinite_power()) {
116 0 : am.power2 = binary_format<T>::infinite_power();
117 0 : am.mantissa = 0;
118 : }
119 : }
120 :
121 : template <typename callback>
122 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14
123 : void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
124 0 : const uint64_t mask
125 : = (shift == 64)
126 : ? UINT64_MAX
127 0 : : (uint64_t(1) << shift) - 1;
128 0 : const uint64_t halfway
129 : = (shift == 0)
130 : ? 0
131 0 : : uint64_t(1) << (shift - 1);
132 0 : uint64_t truncated_bits = am.mantissa & mask;
133 0 : bool is_above = truncated_bits > halfway;
134 0 : bool is_halfway = truncated_bits == halfway;
135 :
136 : // shift digits into position
137 0 : if (shift == 64) {
138 0 : am.mantissa = 0;
139 : } else {
140 0 : am.mantissa >>= shift;
141 : }
142 0 : am.power2 += shift;
143 :
144 0 : bool is_odd = (am.mantissa & 1) == 1;
145 0 : am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
146 0 : }
147 :
148 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14
149 : void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
150 0 : if (shift == 64) {
151 0 : am.mantissa = 0;
152 : } else {
153 0 : am.mantissa >>= shift;
154 : }
155 0 : am.power2 += shift;
156 0 : }
157 : template <typename UC>
158 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
159 : void skip_zeros(UC const * & first, UC const * last) noexcept {
160 : uint64_t val;
161 0 : while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
162 0 : ::memcpy(&val, first, sizeof(uint64_t));
163 0 : if (val != int_cmp_zeros<UC>()) {
164 0 : break;
165 : }
166 0 : first += int_cmp_len<UC>();
167 : }
168 0 : while (first != last) {
169 0 : if (*first != UC('0')) {
170 0 : break;
171 : }
172 0 : first++;
173 : }
174 0 : }
175 :
176 : // determine if any non-zero digits were truncated.
177 : // all characters must be valid digits.
178 : template <typename UC>
179 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
180 : bool is_truncated(UC const * first, UC const * last) noexcept {
181 : // do 8-bit optimizations, can just compare to 8 literal 0s.
182 : uint64_t val;
183 0 : while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
184 0 : ::memcpy(&val, first, sizeof(uint64_t));
185 0 : if (val != int_cmp_zeros<UC>()) {
186 0 : return true;
187 : }
188 0 : first += int_cmp_len<UC>();
189 : }
190 0 : while (first != last) {
191 0 : if (*first != UC('0')) {
192 0 : return true;
193 : }
194 0 : ++first;
195 : }
196 0 : return false;
197 : }
198 : template <typename UC>
199 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
200 : bool is_truncated(span<const UC> s) noexcept {
201 0 : return is_truncated(s.ptr, s.ptr + s.len());
202 : }
203 :
204 :
205 : template <typename UC>
206 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
207 : void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept {
208 0 : value = value * 100000000 + parse_eight_digits_unrolled(p);
209 0 : p += 8;
210 0 : counter += 8;
211 0 : count += 8;
212 0 : }
213 :
214 : template <typename UC>
215 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14
216 : void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
217 0 : value = value * 10 + limb(*p - UC('0'));
218 0 : p++;
219 0 : counter++;
220 0 : count++;
221 0 : }
222 :
223 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
224 : void add_native(bigint& big, limb power, limb value) noexcept {
225 0 : big.mul(power);
226 0 : big.add(value);
227 0 : }
228 :
229 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20
230 : void round_up_bigint(bigint& big, size_t& count) noexcept {
231 : // need to round-up the digits, but need to avoid rounding
232 : // ....9999 to ...10000, which could cause a false halfway point.
233 : add_native(big, 10, 1);
234 0 : count++;
235 0 : }
236 :
237 : // parse the significant digits into a big integer
238 : template <typename UC>
239 : inline FASTFLOAT_CONSTEXPR20
240 0 : void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_digits, size_t& digits) noexcept {
241 : // try to minimize the number of big integer and scalar multiplication.
242 : // therefore, try to parse 8 digits at a time, and multiply by the largest
243 : // scalar value (9 or 19 digits) for each step.
244 0 : size_t counter = 0;
245 0 : digits = 0;
246 0 : limb value = 0;
247 : #ifdef FASTFLOAT_64BIT_LIMB
248 0 : size_t step = 19;
249 : #else
250 : size_t step = 9;
251 : #endif
252 :
253 : // process all integer digits.
254 0 : UC const * p = num.integer.ptr;
255 0 : UC const * pend = p + num.integer.len();
256 : skip_zeros(p, pend);
257 : // process all digits, in increments of step per loop
258 0 : while (p != pend) {
259 0 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
260 : parse_eight_digits(p, value, counter, digits);
261 : }
262 0 : while (counter < step && p != pend && digits < max_digits) {
263 : parse_one_digit(p, value, counter, digits);
264 : }
265 0 : if (digits == max_digits) {
266 : // add the temporary value, then check if we've truncated any digits
267 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
268 0 : bool truncated = is_truncated(p, pend);
269 0 : if (num.fraction.ptr != nullptr) {
270 0 : truncated |= is_truncated(num.fraction);
271 : }
272 0 : if (truncated) {
273 : round_up_bigint(result, digits);
274 : }
275 0 : return;
276 : } else {
277 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
278 0 : counter = 0;
279 0 : value = 0;
280 : }
281 : }
282 :
283 : // add our fraction digits, if they're available.
284 0 : if (num.fraction.ptr != nullptr) {
285 0 : p = num.fraction.ptr;
286 0 : pend = p + num.fraction.len();
287 0 : if (digits == 0) {
288 : skip_zeros(p, pend);
289 : }
290 : // process all digits, in increments of step per loop
291 0 : while (p != pend) {
292 0 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
293 : parse_eight_digits(p, value, counter, digits);
294 : }
295 0 : while (counter < step && p != pend && digits < max_digits) {
296 : parse_one_digit(p, value, counter, digits);
297 : }
298 0 : if (digits == max_digits) {
299 : // add the temporary value, then check if we've truncated any digits
300 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
301 0 : bool truncated = is_truncated(p, pend);
302 0 : if (truncated) {
303 : round_up_bigint(result, digits);
304 : }
305 0 : return;
306 : } else {
307 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
308 0 : counter = 0;
309 0 : value = 0;
310 : }
311 : }
312 : }
313 :
314 0 : if (counter != 0) {
315 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
316 : }
317 : }
318 :
319 : template <typename T>
320 : inline FASTFLOAT_CONSTEXPR20
321 0 : adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
322 0 : FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
323 0 : adjusted_mantissa answer;
324 : bool truncated;
325 0 : answer.mantissa = bigmant.hi64(truncated);
326 0 : int bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
327 0 : answer.power2 = bigmant.bit_length() - 64 + bias;
328 :
329 0 : round<T>(answer, [truncated](adjusted_mantissa& a, int32_t shift) {
330 0 : round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
331 0 : return is_above || (is_halfway && truncated) || (is_odd && is_halfway);
332 : });
333 : });
334 :
335 0 : return answer;
336 : }
337 :
338 : // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
339 : // and for the theoretical digits `n * 2^f`. Since `e` is always negative,
340 : // to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
341 : // we then need to scale by `2^(f- e)`, and then the two significant digits
342 : // are of the same magnitude.
343 : template <typename T>
344 : inline FASTFLOAT_CONSTEXPR20
345 0 : adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
346 0 : bigint& real_digits = bigmant;
347 0 : int32_t real_exp = exponent;
348 :
349 : // get the value of `b`, rounded down, and get a bigint representation of b+h
350 0 : adjusted_mantissa am_b = am;
351 : // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type.
352 0 : round<T>(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); });
353 : T b;
354 0 : to_float(false, am_b, b);
355 0 : adjusted_mantissa theor = to_extended_halfway(b);
356 0 : bigint theor_digits(theor.mantissa);
357 0 : int32_t theor_exp = theor.power2;
358 :
359 : // scale real digits and theor digits to be same power.
360 0 : int32_t pow2_exp = theor_exp - real_exp;
361 0 : uint32_t pow5_exp = uint32_t(-real_exp);
362 0 : if (pow5_exp != 0) {
363 0 : FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
364 : }
365 0 : if (pow2_exp > 0) {
366 0 : FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
367 0 : } else if (pow2_exp < 0) {
368 0 : FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
369 : }
370 :
371 : // compare digits, and use it to director rounding
372 0 : int ord = real_digits.compare(theor_digits);
373 0 : adjusted_mantissa answer = am;
374 0 : round<T>(answer, [ord](adjusted_mantissa& a, int32_t shift) {
375 0 : round_nearest_tie_even(a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
376 : (void)_; // not needed, since we've done our comparison
377 : (void)__; // not needed, since we've done our comparison
378 0 : if (ord > 0) {
379 0 : return true;
380 0 : } else if (ord < 0) {
381 0 : return false;
382 : } else {
383 0 : return is_odd;
384 : }
385 : });
386 : });
387 :
388 0 : return answer;
389 : }
390 :
391 : // parse the significant digits as a big integer to unambiguously round the
392 : // the significant digits. here, we are trying to determine how to round
393 : // an extended float representation close to `b+h`, halfway between `b`
394 : // (the float rounded-down) and `b+u`, the next positive float. this
395 : // algorithm is always correct, and uses one of two approaches. when
396 : // the exponent is positive relative to the significant digits (such as
397 : // 1234), we create a big-integer representation, get the high 64-bits,
398 : // determine if any lower bits are truncated, and use that to direct
399 : // rounding. in case of a negative exponent relative to the significant
400 : // digits (such as 1.2345), we create a theoretical representation of
401 : // `b` as a big-integer type, scaled to the same binary exponent as
402 : // the actual digits. we then compare the big integer representations
403 : // of both, and use that to direct rounding.
404 : template <typename T, typename UC>
405 : inline FASTFLOAT_CONSTEXPR20
406 0 : adjusted_mantissa digit_comp(parsed_number_string_t<UC>& num, adjusted_mantissa am) noexcept {
407 : // remove the invalid exponent bias
408 0 : am.power2 -= invalid_am_bias;
409 :
410 0 : int32_t sci_exp = scientific_exponent(num);
411 0 : size_t max_digits = binary_format<T>::max_digits();
412 0 : size_t digits = 0;
413 0 : bigint bigmant;
414 0 : parse_mantissa(bigmant, num, max_digits, digits);
415 : // can't underflow, since digits is at most max_digits.
416 0 : int32_t exponent = sci_exp + 1 - int32_t(digits);
417 0 : if (exponent >= 0) {
418 0 : return positive_digit_comp<T>(bigmant, exponent);
419 : } else {
420 0 : return negative_digit_comp<T>(bigmant, am, exponent);
421 : }
422 : }
423 :
424 : } // namespace fast_float
425 :
426 : #endif
|