Line data Source code
1 : #ifndef FASTFLOAT_DIGIT_COMPARISON_H
2 : #define FASTFLOAT_DIGIT_COMPARISON_H
3 :
4 : #include <algorithm>
5 : #include <cstdint>
6 : #include <cstring>
7 : #include <iterator>
8 :
9 : #include "float_common.h"
10 : #include "bigint.h"
11 : #include "ascii_number.h"
12 :
13 : namespace fast_float {
14 :
15 : // 1e0 to 1e19
16 : constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
17 : 10UL,
18 : 100UL,
19 : 1000UL,
20 : 10000UL,
21 : 100000UL,
22 : 1000000UL,
23 : 10000000UL,
24 : 100000000UL,
25 : 1000000000UL,
26 : 10000000000UL,
27 : 100000000000UL,
28 : 1000000000000UL,
29 : 10000000000000UL,
30 : 100000000000000UL,
31 : 1000000000000000UL,
32 : 10000000000000000UL,
33 : 100000000000000000UL,
34 : 1000000000000000000UL,
35 : 10000000000000000000UL};
36 :
37 : // calculate the exponent, in scientific notation, of the number.
38 : // this algorithm is not even close to optimized, but it has no practical
39 : // effect on performance: in order to have a faster algorithm, we'd need
40 : // to slow down performance for faster algorithms, and this is still fast.
41 : template <typename UC>
42 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
43 : scientific_exponent(parsed_number_string_t<UC> &num) noexcept {
44 0 : uint64_t mantissa = num.mantissa;
45 0 : int32_t exponent = int32_t(num.exponent);
46 0 : while (mantissa >= 10000) {
47 0 : mantissa /= 10000;
48 0 : exponent += 4;
49 : }
50 0 : while (mantissa >= 100) {
51 0 : mantissa /= 100;
52 0 : exponent += 2;
53 : }
54 0 : while (mantissa >= 10) {
55 0 : mantissa /= 10;
56 0 : exponent += 1;
57 : }
58 0 : return exponent;
59 : }
60 :
61 : // this converts a native floating-point number to an extended-precision float.
62 : template <typename T>
63 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
64 : to_extended(T value) noexcept {
65 : using equiv_uint = typename binary_format<T>::equiv_uint;
66 0 : constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
67 0 : constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
68 0 : constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
69 :
70 0 : adjusted_mantissa am;
71 0 : int32_t bias = binary_format<T>::mantissa_explicit_bits() -
72 : binary_format<T>::minimum_exponent();
73 : equiv_uint bits;
74 : #if FASTFLOAT_HAS_BIT_CAST
75 : bits = std::bit_cast<equiv_uint>(value);
76 : #else
77 0 : ::memcpy(&bits, &value, sizeof(T));
78 : #endif
79 0 : if ((bits & exponent_mask) == 0) {
80 : // denormal
81 0 : am.power2 = 1 - bias;
82 0 : am.mantissa = bits & mantissa_mask;
83 : } else {
84 : // normal
85 0 : am.power2 = int32_t((bits & exponent_mask) >>
86 0 : binary_format<T>::mantissa_explicit_bits());
87 0 : am.power2 -= bias;
88 0 : am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
89 : }
90 :
91 0 : return am;
92 : }
93 :
94 : // get the extended precision value of the halfway point between b and b+u.
95 : // we are given a native float that represents b, so we need to adjust it
96 : // halfway between b and b+u.
97 : template <typename T>
98 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
99 : to_extended_halfway(T value) noexcept {
100 0 : adjusted_mantissa am = to_extended(value);
101 0 : am.mantissa <<= 1;
102 0 : am.mantissa += 1;
103 0 : am.power2 -= 1;
104 0 : return am;
105 : }
106 :
107 : // round an extended-precision float to the nearest machine float.
108 : template <typename T, typename callback>
109 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
110 : callback cb) noexcept {
111 0 : int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
112 0 : if (-am.power2 >= mantissa_shift) {
113 : // have a denormal float
114 0 : int32_t shift = -am.power2 + 1;
115 0 : cb(am, std::min<int32_t>(shift, 64));
116 : // check for round-up: if rounding-nearest carried us to the hidden bit.
117 0 : am.power2 = (am.mantissa <
118 0 : (uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
119 0 : ? 0
120 : : 1;
121 0 : return;
122 : }
123 :
124 : // have a normal float, use the default shift.
125 0 : cb(am, mantissa_shift);
126 :
127 : // check for carry
128 0 : if (am.mantissa >=
129 0 : (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
130 0 : am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
131 0 : am.power2++;
132 : }
133 :
134 : // check for infinite: we could have carried to an infinite power
135 0 : am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
136 0 : if (am.power2 >= binary_format<T>::infinite_power()) {
137 0 : am.power2 = binary_format<T>::infinite_power();
138 0 : am.mantissa = 0;
139 : }
140 : }
141 :
142 : template <typename callback>
143 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
144 : round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
145 : callback cb) noexcept {
146 0 : const uint64_t mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
147 0 : const uint64_t halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
148 0 : uint64_t truncated_bits = am.mantissa & mask;
149 0 : bool is_above = truncated_bits > halfway;
150 0 : bool is_halfway = truncated_bits == halfway;
151 :
152 : // shift digits into position
153 0 : if (shift == 64) {
154 0 : am.mantissa = 0;
155 : } else {
156 0 : am.mantissa >>= shift;
157 : }
158 0 : am.power2 += shift;
159 :
160 0 : bool is_odd = (am.mantissa & 1) == 1;
161 0 : am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
162 0 : }
163 :
164 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
165 : round_down(adjusted_mantissa &am, int32_t shift) noexcept {
166 0 : if (shift == 64) {
167 0 : am.mantissa = 0;
168 : } else {
169 0 : am.mantissa >>= shift;
170 : }
171 0 : am.power2 += shift;
172 0 : }
173 : template <typename UC>
174 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
175 : skip_zeros(UC const *&first, UC const *last) noexcept {
176 : uint64_t val;
177 0 : while (!cpp20_and_in_constexpr() &&
178 0 : std::distance(first, last) >= int_cmp_len<UC>()) {
179 0 : ::memcpy(&val, first, sizeof(uint64_t));
180 0 : if (val != int_cmp_zeros<UC>()) {
181 0 : break;
182 : }
183 0 : first += int_cmp_len<UC>();
184 : }
185 0 : while (first != last) {
186 0 : if (*first != UC('0')) {
187 0 : break;
188 : }
189 0 : first++;
190 : }
191 0 : }
192 :
193 : // determine if any non-zero digits were truncated.
194 : // all characters must be valid digits.
195 : template <typename UC>
196 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
197 : is_truncated(UC const *first, UC const *last) noexcept {
198 : // do 8-bit optimizations, can just compare to 8 literal 0s.
199 : uint64_t val;
200 0 : while (!cpp20_and_in_constexpr() &&
201 0 : std::distance(first, last) >= int_cmp_len<UC>()) {
202 0 : ::memcpy(&val, first, sizeof(uint64_t));
203 0 : if (val != int_cmp_zeros<UC>()) {
204 0 : return true;
205 : }
206 0 : first += int_cmp_len<UC>();
207 : }
208 0 : while (first != last) {
209 0 : if (*first != UC('0')) {
210 0 : return true;
211 : }
212 0 : ++first;
213 : }
214 0 : return false;
215 : }
216 : template <typename UC>
217 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
218 : is_truncated(span<const UC> s) noexcept {
219 0 : return is_truncated(s.ptr, s.ptr + s.len());
220 : }
221 :
222 : template <typename UC>
223 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
224 : parse_eight_digits(const UC *&p, limb &value, size_t &counter,
225 : size_t &count) noexcept {
226 0 : value = value * 100000000 + parse_eight_digits_unrolled(p);
227 0 : p += 8;
228 0 : counter += 8;
229 0 : count += 8;
230 0 : }
231 :
232 : template <typename UC>
233 : fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
234 : parse_one_digit(UC const *&p, limb &value, size_t &counter,
235 : size_t &count) noexcept {
236 0 : value = value * 10 + limb(*p - UC('0'));
237 0 : p++;
238 0 : counter++;
239 0 : count++;
240 0 : }
241 :
242 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
243 : add_native(bigint &big, limb power, limb value) noexcept {
244 0 : big.mul(power);
245 0 : big.add(value);
246 0 : }
247 :
248 : fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
249 : round_up_bigint(bigint &big, size_t &count) noexcept {
250 : // need to round-up the digits, but need to avoid rounding
251 : // ....9999 to ...10000, which could cause a false halfway point.
252 : add_native(big, 10, 1);
253 0 : count++;
254 0 : }
255 :
256 : // parse the significant digits into a big integer
257 : template <typename UC>
258 : inline FASTFLOAT_CONSTEXPR20 void
259 0 : parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
260 : size_t max_digits, size_t &digits) noexcept {
261 : // try to minimize the number of big integer and scalar multiplication.
262 : // therefore, try to parse 8 digits at a time, and multiply by the largest
263 : // scalar value (9 or 19 digits) for each step.
264 0 : size_t counter = 0;
265 0 : digits = 0;
266 0 : limb value = 0;
267 : #ifdef FASTFLOAT_64BIT_LIMB
268 0 : size_t step = 19;
269 : #else
270 : size_t step = 9;
271 : #endif
272 :
273 : // process all integer digits.
274 0 : UC const *p = num.integer.ptr;
275 0 : UC const *pend = p + num.integer.len();
276 : skip_zeros(p, pend);
277 : // process all digits, in increments of step per loop
278 0 : while (p != pend) {
279 0 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
280 0 : (max_digits - digits >= 8)) {
281 : parse_eight_digits(p, value, counter, digits);
282 : }
283 0 : while (counter < step && p != pend && digits < max_digits) {
284 : parse_one_digit(p, value, counter, digits);
285 : }
286 0 : if (digits == max_digits) {
287 : // add the temporary value, then check if we've truncated any digits
288 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
289 0 : bool truncated = is_truncated(p, pend);
290 0 : if (num.fraction.ptr != nullptr) {
291 0 : truncated |= is_truncated(num.fraction);
292 : }
293 0 : if (truncated) {
294 : round_up_bigint(result, digits);
295 : }
296 0 : return;
297 : } else {
298 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
299 0 : counter = 0;
300 0 : value = 0;
301 : }
302 : }
303 :
304 : // add our fraction digits, if they're available.
305 0 : if (num.fraction.ptr != nullptr) {
306 0 : p = num.fraction.ptr;
307 0 : pend = p + num.fraction.len();
308 0 : if (digits == 0) {
309 : skip_zeros(p, pend);
310 : }
311 : // process all digits, in increments of step per loop
312 0 : while (p != pend) {
313 0 : while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
314 0 : (max_digits - digits >= 8)) {
315 : parse_eight_digits(p, value, counter, digits);
316 : }
317 0 : while (counter < step && p != pend && digits < max_digits) {
318 : parse_one_digit(p, value, counter, digits);
319 : }
320 0 : if (digits == max_digits) {
321 : // add the temporary value, then check if we've truncated any digits
322 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
323 0 : bool truncated = is_truncated(p, pend);
324 0 : if (truncated) {
325 : round_up_bigint(result, digits);
326 : }
327 0 : return;
328 : } else {
329 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
330 0 : counter = 0;
331 0 : value = 0;
332 : }
333 : }
334 : }
335 :
336 0 : if (counter != 0) {
337 0 : add_native(result, limb(powers_of_ten_uint64[counter]), value);
338 : }
339 : }
340 :
341 : template <typename T>
342 : inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
343 0 : positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
344 0 : FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
345 0 : adjusted_mantissa answer;
346 : bool truncated;
347 0 : answer.mantissa = bigmant.hi64(truncated);
348 0 : int bias = binary_format<T>::mantissa_explicit_bits() -
349 : binary_format<T>::minimum_exponent();
350 0 : answer.power2 = bigmant.bit_length() - 64 + bias;
351 :
352 0 : round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
353 0 : round_nearest_tie_even(
354 : a, shift,
355 0 : [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
356 0 : return is_above || (is_halfway && truncated) ||
357 0 : (is_odd && is_halfway);
358 : });
359 : });
360 :
361 0 : return answer;
362 : }
363 :
364 : // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
365 : // and for the theoretical digits `n * 2^f`. Since `e` is always negative,
366 : // to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
367 : // we then need to scale by `2^(f- e)`, and then the two significant digits
368 : // are of the same magnitude.
369 : template <typename T>
370 0 : inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
371 : bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
372 0 : bigint &real_digits = bigmant;
373 0 : int32_t real_exp = exponent;
374 :
375 : // get the value of `b`, rounded down, and get a bigint representation of b+h
376 0 : adjusted_mantissa am_b = am;
377 : // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
378 : // -Wnoexcept-type.
379 0 : round<T>(am_b,
380 0 : [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
381 : T b;
382 0 : to_float(false, am_b, b);
383 0 : adjusted_mantissa theor = to_extended_halfway(b);
384 0 : bigint theor_digits(theor.mantissa);
385 0 : int32_t theor_exp = theor.power2;
386 :
387 : // scale real digits and theor digits to be same power.
388 0 : int32_t pow2_exp = theor_exp - real_exp;
389 0 : uint32_t pow5_exp = uint32_t(-real_exp);
390 0 : if (pow5_exp != 0) {
391 0 : FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
392 : }
393 0 : if (pow2_exp > 0) {
394 0 : FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
395 0 : } else if (pow2_exp < 0) {
396 0 : FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
397 : }
398 :
399 : // compare digits, and use it to director rounding
400 0 : int ord = real_digits.compare(theor_digits);
401 0 : adjusted_mantissa answer = am;
402 0 : round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
403 0 : round_nearest_tie_even(
404 0 : a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
405 : (void)_; // not needed, since we've done our comparison
406 : (void)__; // not needed, since we've done our comparison
407 0 : if (ord > 0) {
408 0 : return true;
409 0 : } else if (ord < 0) {
410 0 : return false;
411 : } else {
412 0 : return is_odd;
413 : }
414 : });
415 : });
416 :
417 0 : return answer;
418 : }
419 :
420 : // parse the significant digits as a big integer to unambiguously round the
421 : // the significant digits. here, we are trying to determine how to round
422 : // an extended float representation close to `b+h`, halfway between `b`
423 : // (the float rounded-down) and `b+u`, the next positive float. this
424 : // algorithm is always correct, and uses one of two approaches. when
425 : // the exponent is positive relative to the significant digits (such as
426 : // 1234), we create a big-integer representation, get the high 64-bits,
427 : // determine if any lower bits are truncated, and use that to direct
428 : // rounding. in case of a negative exponent relative to the significant
429 : // digits (such as 1.2345), we create a theoretical representation of
430 : // `b` as a big-integer type, scaled to the same binary exponent as
431 : // the actual digits. we then compare the big integer representations
432 : // of both, and use that to direct rounding.
433 : template <typename T, typename UC>
434 : inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
435 0 : digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
436 : // remove the invalid exponent bias
437 0 : am.power2 -= invalid_am_bias;
438 :
439 0 : int32_t sci_exp = scientific_exponent(num);
440 0 : size_t max_digits = binary_format<T>::max_digits();
441 0 : size_t digits = 0;
442 0 : bigint bigmant;
443 0 : parse_mantissa(bigmant, num, max_digits, digits);
444 : // can't underflow, since digits is at most max_digits.
445 0 : int32_t exponent = sci_exp + 1 - int32_t(digits);
446 0 : if (exponent >= 0) {
447 0 : return positive_digit_comp<T>(bigmant, exponent);
448 : } else {
449 0 : return negative_digit_comp<T>(bigmant, am, exponent);
450 : }
451 : }
452 :
453 : } // namespace fast_float
454 :
455 : #endif
|