LCOV - gdal_filtered.info - gcore/gdal_minmax

LCOV - code coverage report

Current view:	top level - gcore - gdal_minmax_element.hpp (source / functions)		Hit	Total	Coverage
Test:	gdal_filtered.info	Lines:	342	342	100.0 %
Date:	2026-01-11 15:50:51	Functions:	378	481	78.6 %

          Line data    Source code

       1             : /******************************************************************************
       2             :  * Project:  GDAL Core
       3             :  * Purpose:  Utility functions to find minimum and maximum values in a buffer
       4             :  * Author:   Even Rouault, <even dot rouault at spatialys.com>
       5             :  *
       6             :  ******************************************************************************
       7             :  * Copyright (c) 2024, Even Rouault <even dot rouault at spatialys.com>
       8             :  *
       9             :  * SPDX-License-Identifier: MIT
      10             :  ****************************************************************************/
      11             : 
      12             : #ifndef GDAL_MINMAX_ELEMENT_INCLUDED
      13             : #define GDAL_MINMAX_ELEMENT_INCLUDED
      14             : 
      15             : // NOTE: This header requires C++17
      16             : 
      17             : // This file may be vendored by other applications than GDAL
      18             : // WARNING: if modifying this file, please also update the upstream GDAL version
      19             : // at https://github.com/OSGeo/gdal/blob/master/gcore/gdal_minmax_element.hpp
      20             : 
      21             : #include <algorithm>
      22             : #include <cassert>
      23             : #include <cmath>
      24             : #include <cstdint>
      25             : #include <limits>
      26             : #include <type_traits>
      27             : #include <utility>
      28             : 
      29             : #include "gdal.h"
      30             : 
      31             : // Just to please cppcheck
      32             : #ifndef GDAL_COMPUTE_VERSION
      33             : #define GDAL_COMPUTE_VERSION(maj, min, rev)                                    \
      34             :     ((maj)*1000000 + (min)*10000 + (rev)*100)
      35             : #endif
      36             : 
      37             : #ifdef GDAL_COMPILATION
      38             : #include "cpl_float.h"
      39             : #define GDAL_MINMAXELT_NS gdal
      40             : #elif !defined(GDAL_MINMAXELT_NS)
      41             : #error "Please define the GDAL_MINMAXELT_NS macro to define the namespace"
      42             : #elif GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
      43             : #include "cpl_float.h"
      44             : #endif
      45             : 
      46             : #ifdef USE_NEON_OPTIMIZATIONS
      47             : #include "include_sse2neon.h"
      48             : #define GDAL_MINMAX_ELEMENT_USE_SSE2
      49             : #else
      50             : #if defined(__x86_64) || defined(_M_X64)
      51             : #define GDAL_MINMAX_ELEMENT_USE_SSE2
      52             : #endif
      53             : #ifdef GDAL_MINMAX_ELEMENT_USE_SSE2
      54             : // SSE2 header
      55             : #include <emmintrin.h>
      56             : #if defined(__SSE4_1__) || defined(__AVX__)
      57             : #include <smmintrin.h>
      58             : #endif
      59             : #endif
      60             : #endif
      61             : 
      62             : #include "gdal_priv_templates.hpp"
      63             : 
      64             : namespace GDAL_MINMAXELT_NS
      65             : {
      66             : namespace detail
      67             : {
      68             : 
      69             : template <class T> struct is_floating_point
      70             : {
      71             :     static constexpr bool value = std::is_floating_point_v<T>
      72             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
      73             :                                   || std::is_same_v<T, GFloat16>
      74             : #endif
      75             :         ;
      76             : };
      77             : 
      78             : template <class T>
      79             : constexpr bool is_floating_point_v = is_floating_point<T>::value;
      80             : 
      81             : /************************************************************************/
      82             : /*                            compScalar()                              */
      83             : /************************************************************************/
      84             : 
      85      486462 : template <class T, bool IS_MAX> inline static bool compScalar(T x, T y)
      86             : {
      87             :     if constexpr (IS_MAX)
      88      195129 :         return x > y;
      89             :     else
      90      291333 :         return x < y;
      91             : }
      92             : 
      93       14834 : template <typename T> inline static bool IsNan(T x)
      94             : {
      95             :     // We need to write `using std::isnan` instead of directly using
      96             :     // `std::isnan` because `std::isnan` only supports the types
      97             :     // `float` and `double`. The `isnan` for `cpl::Float16` is found in the
      98             :     // `cpl` namespace via argument-dependent lookup
      99             :     // <https://en.cppreference.com/w/cpp/language/adl>.
     100             :     using std::isnan;
     101       14834 :     return isnan(x);
     102             : }
     103             : 
     104     6883345 : template <class T> inline static bool compEqual(T x, T y)
     105             : {
     106     6883345 :     return x == y;
     107             : }
     108             : 
     109             : // On Intel/Neon, we do comparisons on uint16_t instead of casting to float for
     110             : // faster execution.
     111             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0) &&                      \
     112             :     defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
     113       10520 : template <> bool IsNan<GFloat16>(GFloat16 x)
     114             : {
     115             :     uint16_t iX;
     116       10520 :     memcpy(&iX, &x, sizeof(x));
     117             :     // Check that the 5 bits of the exponent are set and the mantissa is not zero.
     118       10520 :     return (iX & 0x7fff) > 0x7c00;
     119             : }
     120             : 
     121        1794 : template <> bool compEqual<GFloat16>(GFloat16 x, GFloat16 y)
     122             : {
     123             :     uint16_t iX, iY;
     124        1794 :     memcpy(&iX, &x, sizeof(x));
     125        1794 :     memcpy(&iY, &y, sizeof(y));
     126             :     // Given our usage where y cannot be NaN we can skip the IsNan tests
     127        1794 :     assert(!IsNan(y));
     128        3580 :     return (iX == iY ||
     129             :             // Also check +0 == -0
     130        3580 :             ((iX | iY) == (1 << 15)));
     131             : }
     132             : 
     133        4346 : template <> bool compScalar<GFloat16, true>(GFloat16 x, GFloat16 y)
     134             : {
     135             :     uint16_t iX, iY;
     136        4346 :     memcpy(&iX, &x, sizeof(x));
     137        4346 :     memcpy(&iY, &y, sizeof(y));
     138             :     bool ret;
     139        4346 :     if (IsNan(x) || IsNan(y))
     140             :     {
     141         813 :         ret = false;
     142             :     }
     143        3533 :     else if (!(iX >> 15))
     144             :     {
     145             :         // +0 considered > -0. We don't really care
     146         163 :         ret = (iY >> 15) || iX > iY;
     147             :     }
     148             :     else
     149             :     {
     150        3370 :         ret = (iY >> 15) && iX < iY;
     151             :     }
     152        4346 :     return ret;
     153             : }
     154             : 
     155        2253 : template <> bool compScalar<GFloat16, false>(GFloat16 x, GFloat16 y)
     156             : {
     157        2253 :     return compScalar<GFloat16, true>(y, x);
     158             : }
     159             : #endif
     160             : 
     161             : /************************************************************************/
     162             : /*                 extremum_element_with_nan_generic()                  */
     163             : /************************************************************************/
     164             : 
     165             : template <class T, bool IS_MAX>
     166             : inline size_t extremum_element_with_nan_generic(const T *v, size_t size)
     167             : {
     168             :     if (size == 0)
     169             :         return 0;
     170             :     size_t idx_of_extremum = 0;
     171             :     auto extremum = v[0];
     172             :     bool extremum_is_nan = IsNan(extremum);
     173             :     size_t i = 1;
     174             :     for (; i < size; ++i)
     175             :     {
     176             :         if (compScalar<T, IS_MAX>(v[i], extremum) ||
     177             :             (extremum_is_nan && !IsNan(v[i])))
     178             :         {
     179             :             extremum = v[i];
     180             :             idx_of_extremum = i;
     181             :             extremum_is_nan = false;
     182             :         }
     183             :     }
     184             :     return idx_of_extremum;
     185             : }
     186             : 
     187             : /************************************************************************/
     188             : /*                 extremum_element_with_nan_generic()                  */
     189             : /************************************************************************/
     190             : 
     191             : template <class T, bool IS_MAX>
     192             : inline size_t extremum_element_with_nan_generic(const T *v, size_t size,
     193             :                                                 T noDataValue)
     194             : {
     195             :     if (IsNan(noDataValue))
     196             :         return extremum_element_with_nan_generic<T, IS_MAX>(v, size);
     197             :     if (size == 0)
     198             :         return 0;
     199             :     size_t idx_of_extremum = 0;
     200             :     auto extremum = v[0];
     201             :     bool extremum_is_nan_or_nodata =
     202             :         IsNan(extremum) || compEqual(extremum, noDataValue);
     203             :     size_t i = 1;
     204             :     for (; i < size; ++i)
     205             :     {
     206             :         if (!compEqual(v[i], noDataValue) &&
     207             :             (compScalar<T, IS_MAX>(v[i], extremum) ||
     208             :              (extremum_is_nan_or_nodata && !IsNan(v[i]))))
     209             :         {
     210             :             extremum = v[i];
     211             :             idx_of_extremum = i;
     212             :             extremum_is_nan_or_nodata = false;
     213             :         }
     214             :     }
     215             :     return idx_of_extremum;
     216             : }
     217             : 
     218             : /************************************************************************/
     219             : /*                       extremum_element_generic()                     */
     220             : /************************************************************************/
     221             : 
     222             : template <class T, bool IS_MAX>
     223             : inline size_t extremum_element_generic(const T *buffer, size_t size,
     224             :                                        bool bHasNoData, T noDataValue)
     225             : {
     226             :     if (bHasNoData)
     227             :     {
     228             :         if constexpr (is_floating_point_v<T>)
     229             :         {
     230             :             if (IsNan(noDataValue))
     231             :             {
     232             :                 if constexpr (IS_MAX)
     233             :                 {
     234             :                     return std::max_element(buffer, buffer + size,
     235             :                                             [](T a, T b) {
     236             :                                                 return IsNan(b)   ? false
     237             :                                                        : IsNan(a) ? true
     238             :                                                                   : a < b;
     239             :                                             }) -
     240             :                            buffer;
     241             :                 }
     242             :                 else
     243             :                 {
     244             :                     return std::min_element(buffer, buffer + size,
     245             :                                             [](T a, T b) {
     246             :                                                 return IsNan(b)   ? true
     247             :                                                        : IsNan(a) ? false
     248             :                                                                   : a < b;
     249             :                                             }) -
     250             :                            buffer;
     251             :                 }
     252             :             }
     253             :             else
     254             :             {
     255             :                 if constexpr (IS_MAX)
     256             :                 {
     257             :                     return std::max_element(buffer, buffer + size,
     258             :                                             [noDataValue](T a, T b)
     259             :                                             {
     260             :                                                 return IsNan(b)   ? false
     261             :                                                        : IsNan(a) ? true
     262             :                                                        : (b == noDataValue)
     263             :                                                            ? false
     264             :                                                        : (a == noDataValue)
     265             :                                                            ? true
     266             :                                                            : a < b;
     267             :                                             }) -
     268             :                            buffer;
     269             :                 }
     270             :                 else
     271             :                 {
     272             :                     return std::min_element(buffer, buffer + size,
     273             :                                             [noDataValue](T a, T b)
     274             :                                             {
     275             :                                                 return IsNan(b)   ? true
     276             :                                                        : IsNan(a) ? false
     277             :                                                        : (b == noDataValue)
     278             :                                                            ? true
     279             :                                                        : (a == noDataValue)
     280             :                                                            ? false
     281             :                                                            : a < b;
     282             :                                             }) -
     283             :                            buffer;
     284             :                 }
     285             :             }
     286             :         }
     287             :         else
     288             :         {
     289             :             if constexpr (IS_MAX)
     290             :             {
     291             :                 return std::max_element(buffer, buffer + size,
     292             :                                         [noDataValue](T a, T b) {
     293             :                                             return (b == noDataValue)   ? false
     294             :                                                    : (a == noDataValue) ? true
     295             :                                                                         : a < b;
     296             :                                         }) -
     297             :                        buffer;
     298             :             }
     299             :             else
     300             :             {
     301             :                 return std::min_element(buffer, buffer + size,
     302             :                                         [noDataValue](T a, T b) {
     303             :                                             return (b == noDataValue)   ? true
     304             :                                                    : (a == noDataValue) ? false
     305             :                                                                         : a < b;
     306             :                                         }) -
     307             :                        buffer;
     308             :             }
     309             :         }
     310             :     }
     311             :     else
     312             :     {
     313             :         if constexpr (is_floating_point_v<T>)
     314             :         {
     315             :             if constexpr (IS_MAX)
     316             :             {
     317             :                 return std::max_element(buffer, buffer + size,
     318             :                                         [](T a, T b) {
     319             :                                             return IsNan(b)   ? false
     320             :                                                    : IsNan(a) ? true
     321             :                                                               : a < b;
     322             :                                         }) -
     323             :                        buffer;
     324             :             }
     325             :             else
     326             :             {
     327             :                 return std::min_element(buffer, buffer + size,
     328             :                                         [](T a, T b) {
     329             :                                             return IsNan(b)   ? true
     330             :                                                    : IsNan(a) ? false
     331             :                                                               : a < b;
     332             :                                         }) -
     333             :                        buffer;
     334             :             }
     335             :         }
     336             :         else
     337             :         {
     338             :             if constexpr (IS_MAX)
     339             :             {
     340             :                 return std::max_element(buffer, buffer + size) - buffer;
     341             :             }
     342             :             else
     343             :             {
     344             :                 return std::min_element(buffer, buffer + size) - buffer;
     345             :             }
     346             :         }
     347             :     }
     348             : }
     349             : 
     350             : #ifdef GDAL_MINMAX_ELEMENT_USE_SSE2
     351             : 
     352             : /************************************************************************/
     353             : /*                     extremum_element_sse2()                          */
     354             : /************************************************************************/
     355             : 
     356         115 : static inline int8_t Shift8(uint8_t x)
     357             : {
     358         115 :     return static_cast<int8_t>(x + std::numeric_limits<int8_t>::min());
     359             : }
     360             : 
     361          62 : static inline int16_t Shift16(uint16_t x)
     362             : {
     363          62 :     return static_cast<int16_t>(x + std::numeric_limits<int16_t>::min());
     364             : }
     365             : 
     366             : CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
     367         672 : static inline int32_t Shift32(uint32_t x)
     368             : {
     369         672 :     x += static_cast<uint32_t>(std::numeric_limits<int32_t>::min());
     370             :     int32_t ret;
     371         672 :     memcpy(&ret, &x, sizeof(x));
     372         672 :     return ret;
     373             : }
     374             : 
     375             : CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
     376         130 : static inline int64_t Shift64(uint64_t x)
     377             : {
     378         130 :     x += static_cast<uint64_t>(std::numeric_limits<int64_t>::min());
     379             :     int64_t ret;
     380         130 :     memcpy(&ret, &x, sizeof(x));
     381         130 :     return ret;
     382             : }
     383             : 
     384             : // Return a _mm128[i|d] register with all its elements set to x
     385       33689 : template <class T> static inline auto set1(T x)
     386             : {
     387             :     if constexpr (std::is_same_v<T, uint8_t>)
     388         230 :         return _mm_set1_epi8(Shift8(x));
     389             :     else if constexpr (std::is_same_v<T, int8_t>)
     390         470 :         return _mm_set1_epi8(x);
     391             :     else if constexpr (std::is_same_v<T, uint16_t>)
     392         124 :         return _mm_set1_epi16(Shift16(x));
     393             :     else if constexpr (std::is_same_v<T, int16_t>)
     394        9150 :         return _mm_set1_epi16(x);
     395             :     else if constexpr (std::is_same_v<T, uint32_t>)
     396        1344 :         return _mm_set1_epi32(Shift32(x));
     397             :     else if constexpr (std::is_same_v<T, int32_t>)
     398        3160 :         return _mm_set1_epi32(x);
     399             :     else if constexpr (std::is_same_v<T, uint64_t>)
     400         260 :         return _mm_set1_epi64x(Shift64(x));
     401             :     else if constexpr (std::is_same_v<T, int64_t>)
     402         189 :         return _mm_set1_epi64x(x);
     403             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
     404             :     else if constexpr (std::is_same_v<T, GFloat16>)
     405             :     {
     406             :         int16_t iX;
     407         137 :         memcpy(&iX, &x, sizeof(x));
     408         274 :         return _mm_set1_epi16(iX);
     409             :     }
     410             : #endif
     411             :     else if constexpr (std::is_same_v<T, float>)
     412        2729 :         return _mm_set1_ps(x);
     413             :     else
     414       21685 :         return _mm_set1_pd(x);
     415             : }
     416             : 
     417             : // Return a _mm128[i|d] register with all its elements set to x
     418       38338 : template <class T> static inline auto set1_unshifted(T x)
     419             : {
     420             :     if constexpr (std::is_same_v<T, uint8_t>)
     421             :     {
     422             :         int8_t xSigned;
     423         124 :         memcpy(&xSigned, &x, sizeof(xSigned));
     424         248 :         return _mm_set1_epi8(xSigned);
     425             :     }
     426             :     else if constexpr (std::is_same_v<T, int8_t>)
     427         928 :         return _mm_set1_epi8(x);
     428             :     else if constexpr (std::is_same_v<T, uint16_t>)
     429             :     {
     430             :         int16_t xSigned;
     431          63 :         memcpy(&xSigned, &x, sizeof(xSigned));
     432         126 :         return _mm_set1_epi16(xSigned);
     433             :     }
     434             :     else if constexpr (std::is_same_v<T, int16_t>)
     435       19264 :         return _mm_set1_epi16(x);
     436             :     else if constexpr (std::is_same_v<T, uint32_t>)
     437             :     {
     438             :         int32_t xSigned;
     439         930 :         memcpy(&xSigned, &x, sizeof(xSigned));
     440        1860 :         return _mm_set1_epi32(xSigned);
     441             :     }
     442             :     else if constexpr (std::is_same_v<T, int32_t>)
     443        5279 :         return _mm_set1_epi32(x);
     444             :     else if constexpr (std::is_same_v<T, uint64_t>)
     445             :     {
     446             :         int64_t xSigned;
     447         121 :         memcpy(&xSigned, &x, sizeof(xSigned));
     448         242 :         return _mm_set1_epi64x(xSigned);
     449             :     }
     450             :     else if constexpr (std::is_same_v<T, int64_t>)
     451         147 :         return _mm_set1_epi64x(x);
     452             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
     453             :     else if constexpr (std::is_same_v<T, GFloat16>)
     454             :     {
     455             :         int16_t iX;
     456         161 :         memcpy(&iX, &x, sizeof(x));
     457         322 :         return _mm_set1_epi16(iX);
     458             :     }
     459             : #endif
     460             :     else if constexpr (std::is_same_v<T, float>)
     461        3933 :         return _mm_set1_ps(x);
     462             :     else
     463       17484 :         return _mm_set1_pd(x);
     464             : }
     465             : 
     466             : // Load as many values of type T at a _mm128[i|d] register can contain from x
     467   145541938 : template <class T> static inline auto loadv(const T *x)
     468             : {
     469             :     if constexpr (std::is_same_v<T, float>)
     470    20045636 :         return _mm_loadu_ps(x);
     471             :     else if constexpr (std::is_same_v<T, double>)
     472    40287388 :         return _mm_loadu_pd(x);
     473             :     else
     474    85208914 :         return _mm_loadu_si128(reinterpret_cast<const __m128i *>(x));
     475             : }
     476             : 
     477    15000144 : inline __m128i IsNanGFloat16(__m128i x)
     478             : {
     479             :     // (iX & 0x7fff) > 0x7c00
     480    45000332 :     return _mm_cmpgt_epi16(_mm_and_si128(x, _mm_set1_epi16(0x7fff)),
     481    15000144 :                            _mm_set1_epi16(0x7c00));
     482             : }
     483             : 
     484             : template <class T, class SSE_T>
     485    82662930 : static inline SSE_T blendv(SSE_T a, SSE_T b, SSE_T mask)
     486             : {
     487             :     if constexpr (std::is_same_v<T, float>)
     488             :     {
     489             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
     490             :         return _mm_blendv_ps(a, b, mask);
     491             : #else
     492    30000180 :         return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b));
     493             : #endif
     494             :     }
     495             :     else if constexpr (std::is_same_v<T, double>)
     496             :     {
     497             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
     498             :         return _mm_blendv_pd(a, b, mask);
     499             : #else
     500    60064020 :         return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, b));
     501             : #endif
     502             :     }
     503             :     else
     504             :     {
     505             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
     506             :         return _mm_blendv_epi8(a, b, mask);
     507             : #else
     508   157924760 :         return _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b));
     509             : #endif
     510             :     }
     511             : }
     512             : 
     513    10000058 : inline __m128i cmpgt_ph(__m128i x, __m128i y)
     514             : {
     515             : #ifdef slow
     516             :     GFloat16 vx[8], vy[8];
     517             :     int16_t res[8];
     518             :     _mm_storeu_si128(reinterpret_cast<__m128i *>(vx), x);
     519             :     _mm_storeu_si128(reinterpret_cast<__m128i *>(vy), y);
     520             :     for (int i = 0; i < 8; ++i)
     521             :     {
     522             :         res[i] = vx[i] > vy[i] ? -1 : 0;
     523             :     }
     524             :     return _mm_loadu_si128(reinterpret_cast<const __m128i *>(res));
     525             : #else
     526    10000058 :     const auto x_is_negative = _mm_srai_epi16(x, 15);
     527    10000058 :     const auto y_is_negative = _mm_srai_epi16(y, 15);
     528    40000252 :     return _mm_andnot_si128(
     529             :         // only x can be NaN given how we use this method
     530             :         IsNanGFloat16(x),
     531             :         blendv<int16_t>(_mm_or_si128(y_is_negative, _mm_cmpgt_epi16(x, y)),
     532             :                         _mm_and_si128(y_is_negative, _mm_cmpgt_epi16(y, x)),
     533    10000058 :                         x_is_negative));
     534             : #endif
     535             : }
     536             : 
     537    40007320 : inline __m128i cmpgt_epi64(__m128i x, __m128i y)
     538             : {
     539             : #if defined(__SSE4_2__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
     540             :     return _mm_cmpgt_epi64(x, y);
     541             : #else
     542   120021960 :     auto tmp = _mm_and_si128(_mm_sub_epi64(y, x), _mm_cmpeq_epi32(x, y));
     543    40007320 :     tmp = _mm_or_si128(tmp, _mm_cmpgt_epi32(x, y));
     544             :     // Replicate the 2 odd-indexed (hi) 32-bit word into the 2 even-indexed (lo)
     545             :     // ones
     546    40007320 :     return _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 3, 1, 1));
     547             : #endif
     548             : }
     549             : 
     550             : // Return a __m128i register with bits set when x[i] < y[i] when !IS_MAX
     551             : // or x[i] > y[i] when IS_MAX
     552             : template <class T, bool IS_MAX, class SSE_T>
     553   145541848 : static inline __m128i comp(SSE_T x, SSE_T y)
     554             : {
     555             :     if constexpr (IS_MAX)
     556             :     {
     557             :         if constexpr (std::is_same_v<T, uint8_t>)
     558     3750472 :             return _mm_cmpgt_epi8(
     559             :                 _mm_add_epi8(x,
     560     1250154 :                              _mm_set1_epi8(std::numeric_limits<int8_t>::min())),
     561     1250154 :                 y);
     562             :         else if constexpr (std::is_same_v<T, int8_t>)
     563     1250422 :             return _mm_cmpgt_epi8(x, y);
     564             :         else if constexpr (std::is_same_v<T, uint16_t>)
     565     7500064 :             return _mm_cmpgt_epi16(
     566             :                 _mm_add_epi16(
     567     2500018 :                     x, _mm_set1_epi16(std::numeric_limits<int16_t>::min())),
     568     2500018 :                 y);
     569             :         else if constexpr (std::is_same_v<T, int16_t>)
     570     2523946 :             return _mm_cmpgt_epi16(x, y);
     571             :         else if constexpr (std::is_same_v<T, uint32_t>)
     572    15010920 :             return _mm_cmpgt_epi32(
     573             :                 _mm_add_epi32(
     574             :                     x, _mm_set1_epi32(std::numeric_limits<int32_t>::min())),
     575     5003630 :                 y);
     576             :         else if constexpr (std::is_same_v<T, int32_t>)
     577     5072498 :             return _mm_cmpgt_epi32(x, y);
     578             :         else if constexpr (std::is_same_v<T, uint64_t>)
     579             :         {
     580    20002736 :             return cmpgt_epi64(
     581             :                 _mm_add_epi64(
     582    10001358 :                     x, _mm_set1_epi64x(std::numeric_limits<int64_t>::min())),
     583    10001358 :                 y);
     584             :         }
     585             :         else if constexpr (std::is_same_v<T, int64_t>)
     586             :         {
     587    10002158 :             return cmpgt_epi64(x, y);
     588             :         }
     589             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
     590             :         else if constexpr (std::is_same_v<T, GFloat16>)
     591             :         {
     592     5000024 :             return cmpgt_ph(x, y);
     593             :         }
     594             : #endif
     595             :         else if constexpr (std::is_same_v<T, float>)
     596    20045576 :             return _mm_castps_si128(_mm_cmpgt_ps(x, y));
     597             :         else
     598    40292264 :             return _mm_castpd_si128(_mm_cmpgt_pd(x, y));
     599             :     }
     600             :     else
     601             :     {
     602             :         if constexpr (std::is_same_v<T, uint8_t>)
     603     3750508 :             return _mm_cmplt_epi8(
     604             :                 _mm_add_epi8(x,
     605     1250166 :                              _mm_set1_epi8(std::numeric_limits<int8_t>::min())),
     606     1250166 :                 y);
     607             :         else if constexpr (std::is_same_v<T, int8_t>)
     608     1250434 :             return _mm_cmplt_epi8(x, y);
     609             :         else if constexpr (std::is_same_v<T, uint16_t>)
     610     7500148 :             return _mm_cmplt_epi16(
     611             :                 _mm_add_epi16(
     612     2500046 :                     x, _mm_set1_epi16(std::numeric_limits<int16_t>::min())),
     613     2500046 :                 y);
     614             :         else if constexpr (std::is_same_v<T, int16_t>)
     615     2523974 :             return _mm_cmplt_epi16(x, y);
     616             :         else if constexpr (std::is_same_v<T, uint32_t>)
     617    15011100 :             return _mm_cmplt_epi32(
     618             :                 _mm_add_epi32(
     619             :                     x, _mm_set1_epi32(std::numeric_limits<int32_t>::min())),
     620     5003690 :                 y);
     621             :         else if constexpr (std::is_same_v<T, int32_t>)
     622     5072558 :             return _mm_cmplt_epi32(x, y);
     623             :         else if constexpr (std::is_same_v<T, uint64_t>)
     624             :         {
     625    20002984 :             return cmpgt_epi64(
     626             :                 y, _mm_add_epi64(x, _mm_set1_epi64x(
     627    20002984 :                                         std::numeric_limits<int64_t>::min())));
     628             :         }
     629             :         else if constexpr (std::is_same_v<T, int64_t>)
     630             :         {
     631    10002282 :             return cmpgt_epi64(y, x);
     632             :         }
     633             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
     634             :         else if constexpr (std::is_same_v<T, GFloat16>)
     635             :         {
     636     5000024 :             return cmpgt_ph(y, x);
     637             :         }
     638             : #endif
     639             :         else if constexpr (std::is_same_v<T, float>)
     640    20045696 :             return _mm_castps_si128(_mm_cmplt_ps(x, y));
     641             :         else
     642    40282512 :             return _mm_castpd_si128(_mm_cmplt_pd(x, y));
     643             :     }
     644             : }
     645             : 
     646             : template <class T, class SSE_T> static inline SSE_T compeq(SSE_T a, SSE_T b);
     647             : 
     648     1250018 : template <> __m128i compeq<uint8_t>(__m128i a, __m128i b)
     649             : {
     650     1250018 :     return _mm_cmpeq_epi8(a, b);
     651             : }
     652             : 
     653     1250002 : template <> __m128i compeq<int8_t>(__m128i a, __m128i b)
     654             : {
     655     1250002 :     return _mm_cmpeq_epi8(a, b);
     656             : }
     657             : 
     658     2500018 : template <> __m128i compeq<uint16_t>(__m128i a, __m128i b)
     659             : {
     660     2500018 :     return _mm_cmpeq_epi16(a, b);
     661             : }
     662             : 
     663     2544114 : template <> __m128i compeq<int16_t>(__m128i a, __m128i b)
     664             : {
     665     2544114 :     return _mm_cmpeq_epi16(a, b);
     666             : }
     667             : 
     668     5000058 : template <> __m128i compeq<uint32_t>(__m128i a, __m128i b)
     669             : {
     670     5000058 :     return _mm_cmpeq_epi32(a, b);
     671             : }
     672             : 
     673     5096898 : template <> __m128i compeq<int32_t>(__m128i a, __m128i b)
     674             : {
     675     5096898 :     return _mm_cmpeq_epi32(a, b);
     676             : }
     677             : 
     678    20000328 : template <> __m128i compeq<uint64_t>(__m128i a, __m128i b)
     679             : {
     680             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
     681             :     return _mm_cmpeq_epi64(a, b);
     682             : #else
     683    20000328 :     auto tmp = _mm_cmpeq_epi32(a, b);
     684             :     // The shuffle swaps hi-lo 32-bit words
     685    40000656 :     return _mm_and_si128(tmp, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 3, 0, 1)));
     686             : #endif
     687             : }
     688             : 
     689             : template <>
     690             : #if defined(__INTEL_CLANG_COMPILER) &&                                         \
     691             :     !(defined(__SSE4_1__) || defined(__AVX__))
     692             : // ICC 2024 has a bug with the following code when -fiopenmp is enabled.
     693             : // Disabling inlining works around it...
     694             : __attribute__((noinline))
     695             : #endif
     696             : __m128i
     697    10000170 : compeq<int64_t>(__m128i a, __m128i b)
     698             : {
     699    10000170 :     return compeq<uint64_t>(a, b);
     700             : }
     701             : 
     702    10000040 : template <> __m128 compeq<float>(__m128 a, __m128 b)
     703             : {
     704    10000040 :     return _mm_cmpeq_ps(a, b);
     705             : }
     706             : 
     707    20021340 : template <> __m128d compeq<double>(__m128d a, __m128d b)
     708             : {
     709    20021340 :     return _mm_cmpeq_pd(a, b);
     710             : }
     711             : 
     712             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
     713     5000036 : template <> __m128i compeq<GFloat16>(__m128i a, __m128i b)
     714             : {
     715             :     // !isnan(a) && !isnan(b) && (a == b || (a|b) == 0x8000)
     716    30000236 :     return _mm_andnot_si128(
     717             :         IsNanGFloat16(a),  // b cannot be NaN given how we use this method
     718             :         _mm_or_si128(_mm_cmpeq_epi16(a, b),
     719             :                      _mm_cmpeq_epi16(
     720             :                          _mm_or_si128(a, b),
     721    10000082 :                          _mm_set1_epi16(std::numeric_limits<int16_t>::min()))));
     722             : }
     723             : #endif
     724             : 
     725             : // Using SSE2
     726             : template <class T, bool IS_MAX, bool HAS_NODATA>
     727             : #if defined(__GNUC__)
     728             : __attribute__((noinline))
     729             : #endif
     730             : size_t
     731       13966 : extremum_element_sse2(const T *v, size_t size, T noDataValue)
     732             : {
     733             :     static_assert(std::is_same_v<T, uint8_t> || std::is_same_v<T, int8_t> ||
     734             :                   std::is_same_v<T, uint16_t> || std::is_same_v<T, int16_t> ||
     735             :                   std::is_same_v<T, uint32_t> || std::is_same_v<T, int32_t> ||
     736             :                   std::is_same_v<T, uint64_t> || std::is_same_v<T, int64_t> ||
     737             :                   is_floating_point_v<T>);
     738       13966 :     if (size == 0)
     739          22 :         return 0;
     740       13944 :     size_t idx_of_extremum = 0;
     741       13944 :     T extremum = v[0];
     742       13944 :     [[maybe_unused]] bool extremum_is_invalid = false;
     743             :     if constexpr (is_floating_point_v<T>)
     744             :     {
     745             :         if constexpr (HAS_NODATA)
     746             :         {
     747        5346 :             if (IsNan(noDataValue))
     748             :             {
     749           6 :                 return extremum_element_sse2<T, IS_MAX, false>(
     750           4 :                     v, size, static_cast<T>(0));
     751             :             }
     752             :         }
     753        8228 :         extremum_is_invalid = IsNan(extremum);
     754             :     }
     755             :     if constexpr (HAS_NODATA)
     756             :     {
     757        8793 :         if (compEqual(extremum, noDataValue))
     758        4235 :             extremum_is_invalid = true;
     759             :     }
     760       13940 :     size_t i = 1;
     761             : 
     762       13940 :     constexpr size_t VALS_PER_REG = sizeof(set1(extremum)) / sizeof(extremum);
     763       13940 :     constexpr int LOOP_UNROLLING = 4;
     764             :     // If changing the value, then we need to adjust the number of sse_valX
     765             :     // loading in the loop.
     766             :     static_assert(LOOP_UNROLLING == 4);
     767       13940 :     constexpr size_t VALS_PER_ITER = VALS_PER_REG * LOOP_UNROLLING;
     768             : 
     769     8394542 :     const auto update = [v, noDataValue, &extremum, &idx_of_extremum,
     770             :                          &extremum_is_invalid](size_t idx)
     771             :     {
     772             :         if constexpr (HAS_NODATA)
     773             :         {
     774     6876346 :             if (compEqual(v[idx], noDataValue))
     775     6649356 :                 return;
     776      226992 :             if (extremum_is_invalid)
     777             :             {
     778             :                 if constexpr (is_floating_point_v<T>)
     779             :                 {
     780          32 :                     if (IsNan(v[idx]))
     781           6 :                         return;
     782             :                 }
     783          50 :                 extremum = v[idx];
     784          50 :                 idx_of_extremum = idx;
     785          50 :                 extremum_is_invalid = false;
     786          50 :                 return;
     787             :             }
     788             :         }
     789             :         else
     790             :         {
     791      263872 :             CPL_IGNORE_RET_VAL(noDataValue);
     792             :         }
     793      490808 :         if (compScalar<T, IS_MAX>(v[idx], extremum))
     794             :         {
     795      155513 :             extremum = v[idx];
     796      155513 :             idx_of_extremum = idx;
     797      155513 :             extremum_is_invalid = false;
     798             :         }
     799             :         else if constexpr (is_floating_point_v<T>)
     800             :         {
     801      101031 :             if (extremum_is_invalid && !IsNan(v[idx]))
     802             :             {
     803          28 :                 extremum = v[idx];
     804          28 :                 idx_of_extremum = idx;
     805          28 :                 extremum_is_invalid = false;
     806             :             }
     807             :         }
     808             :     };
     809             : 
     810      200419 :     for (; i < VALS_PER_ITER && i < size; ++i)
     811             :     {
     812      186479 :         update(i);
     813             :     }
     814             : 
     815       13940 :     [[maybe_unused]] auto sse_neutral = set1_unshifted(static_cast<T>(0));
     816       13940 :     [[maybe_unused]] auto sse_nodata = set1_unshifted(noDataValue);
     817             :     if constexpr (HAS_NODATA || is_floating_point_v<T>)
     818             :     {
     819     6633076 :         for (; i < size && extremum_is_invalid; ++i)
     820             :         {
     821     6621399 :             update(i);
     822             :         }
     823       11679 :         if (!extremum_is_invalid)
     824             :         {
     825        7624 :             for (; i < size && (i % VALS_PER_ITER) != 0; ++i)
     826             :             {
     827         148 :                 update(i);
     828             :             }
     829        7476 :             sse_neutral = set1_unshifted(extremum);
     830             :         }
     831             :     }
     832             : 
     833       13940 :     auto sse_extremum = set1(extremum);
     834             : 
     835       13940 :     [[maybe_unused]] size_t hits = 0;
     836       13940 :     const auto sse_iter_count = (size / VALS_PER_ITER) * VALS_PER_ITER;
     837    36398833 :     for (; i < sse_iter_count; i += VALS_PER_ITER)
     838             :     {
     839             :         // A bit of loop unrolling to save 3/4 of slow movemask operations.
     840    36385490 :         auto sse_val0 = loadv(v + i + 0 * VALS_PER_REG);
     841    36385490 :         auto sse_val1 = loadv(v + i + 1 * VALS_PER_REG);
     842    36385490 :         auto sse_val2 = loadv(v + i + 2 * VALS_PER_REG);
     843    36385490 :         auto sse_val3 = loadv(v + i + 3 * VALS_PER_REG);
     844             : 
     845             :         if constexpr (HAS_NODATA)
     846             :         {
     847             :             // Replace all components that are at the nodata value by a
     848             :             // neutral value (current minimum)
     849    18165722 :             const auto replaceNoDataByNeutral =
     850   145325664 :                 [sse_neutral, sse_nodata](auto sse_val)
     851             :             {
     852    72662852 :                 const auto eq_nodata = compeq<T>(sse_val, sse_nodata);
     853    72662852 :                 return blendv<T>(sse_val, sse_neutral, eq_nodata);
     854             :             };
     855             : 
     856    18165722 :             sse_val0 = replaceNoDataByNeutral(sse_val0);
     857    18165722 :             sse_val1 = replaceNoDataByNeutral(sse_val1);
     858    18165722 :             sse_val2 = replaceNoDataByNeutral(sse_val2);
     859    18165722 :             sse_val3 = replaceNoDataByNeutral(sse_val3);
     860             :         }
     861             : 
     862   145541936 :         if (_mm_movemask_epi8(_mm_or_si128(
     863             :                 _mm_or_si128(comp<T, IS_MAX>(sse_val0, sse_extremum),
     864             :                              comp<T, IS_MAX>(sse_val1, sse_extremum)),
     865             :                 _mm_or_si128(comp<T, IS_MAX>(sse_val2, sse_extremum),
     866    36385490 :                              comp<T, IS_MAX>(sse_val3, sse_extremum)))) != 0)
     867             :         {
     868             :             if constexpr (!std::is_same_v<T, int8_t> &&
     869             :                           !std::is_same_v<T, uint8_t>)
     870             :             {
     871             :                 // The above tests excluding int8_t/uint8_t is due to the fact
     872             :                 // with those small ranges of values we will quickly converge
     873             :                 // to the minimum, so no need to do the below "smart" test.
     874             : 
     875       20240 :                 if (++hits == size / 16)
     876             :                 {
     877             :                     // If we have an almost sorted array, then using this code path
     878             :                     // will hurt performance. Arbitrary give up if we get here
     879             :                     // more than 1. / 16 of the size of the array.
     880             :                     // fprintf(stderr, "going to non-vector path\n");
     881         573 :                     break;
     882             :                 }
     883             :             }
     884      250173 :             for (size_t j = 0; j < VALS_PER_ITER; j++)
     885             :             {
     886      230424 :                 update(i + j);
     887             :             }
     888             : 
     889       19749 :             sse_extremum = set1(extremum);
     890             :             if constexpr (HAS_NODATA)
     891             :             {
     892        2982 :                 sse_neutral = set1_unshifted(extremum);
     893             :             }
     894             :         }
     895             :     }
     896      115712 :     for (; i < size; ++i)
     897             :     {
     898      101772 :         update(i);
     899             :     }
     900       13940 :     return idx_of_extremum;
     901             : }
     902             : 
     903             : /************************************************************************/
     904             : /*                         extremum_element()                           */
     905             : /************************************************************************/
     906             : 
     907             : template <class T, bool IS_MAX>
     908        8808 : inline size_t extremum_element(const T *buffer, size_t size, T noDataValue)
     909             : {
     910        8808 :     return extremum_element_sse2<T, IS_MAX, true>(buffer, size, noDataValue);
     911             : }
     912             : 
     913             : template <class T, bool IS_MAX>
     914        5154 : inline size_t extremum_element(const T *buffer, size_t size)
     915             : {
     916        5177 :     return extremum_element_sse2<T, IS_MAX, false>(buffer, size,
     917        5177 :                                                    static_cast<T>(0));
     918             : }
     919             : 
     920             : #else
     921             : 
     922             : template <class T, bool IS_MAX>
     923             : inline size_t extremum_element(const T *buffer, size_t size, T noDataValue)
     924             : {
     925             :     if constexpr (is_floating_point_v<T>)
     926             :         return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size,
     927             :                                                             noDataValue);
     928             :     else
     929             :         return extremum_element_generic<T, IS_MAX>(buffer, size, true,
     930             :                                                    noDataValue);
     931             : }
     932             : 
     933             : template <class T, bool IS_MAX>
     934             : inline size_t extremum_element(const T *buffer, size_t size)
     935             : {
     936             :     if constexpr (is_floating_point_v<T>)
     937             :         return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size);
     938             :     else
     939             :         return extremum_element_generic<T, IS_MAX>(buffer, size, false,
     940             :                                                    static_cast<T>(0));
     941             : }
     942             : 
     943             : #endif
     944             : 
     945             : /************************************************************************/
     946             : /*                            extremum_element()                        */
     947             : /************************************************************************/
     948             : 
     949             : template <class T, bool IS_MAX>
     950       13962 : inline size_t extremum_element(const T *buffer, size_t size, bool bHasNoData,
     951             :                                T noDataValue)
     952             : {
     953             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0) &&                      \
     954             :     !defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
     955             :     if constexpr (std::is_same_v<T, GFloat16>)
     956             :     {
     957             :         if (bHasNoData)
     958             :             return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size,
     959             :                                                                 noDataValue);
     960             :         else
     961             :             return extremum_element_with_nan_generic<T, IS_MAX>(buffer, size);
     962             :     }
     963             :     else
     964             : #endif
     965       13962 :         if (bHasNoData)
     966        8808 :         return extremum_element<T, IS_MAX>(buffer, size, noDataValue);
     967             :     else
     968        5154 :         return extremum_element<T, IS_MAX>(buffer, size);
     969             : }
     970             : 
     971             : template <class T, class NODATAType>
     972        4458 : inline bool IsValueExactAs([[maybe_unused]] NODATAType noDataValue)
     973             : {
     974             :     if constexpr (std::is_same_v<T, NODATAType>)
     975        2753 :         return true;
     976             :     else
     977        1705 :         return GDALIsValueExactAs<T>(static_cast<double>(noDataValue));
     978             : }
     979             : 
     980             : template <bool IS_MAX, class NODATAType>
     981         226 : size_t extremum_element(const void *buffer, size_t nElts, GDALDataType eDT,
     982             :                         bool bHasNoData, NODATAType noDataValue)
     983             : {
     984         226 :     switch (eDT)
     985             :     {
     986             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 7, 0)
     987          15 :         case GDT_Int8:
     988             :         {
     989             :             using T = int8_t;
     990          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
     991          15 :             return extremum_element<T, IS_MAX>(
     992             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
     993          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
     994             :         }
     995             : #endif
     996          35 :         case GDT_Byte:
     997             :         {
     998             :             using T = uint8_t;
     999          35 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1000          35 :             return extremum_element<T, IS_MAX>(
    1001             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1002          35 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1003             :         }
    1004          15 :         case GDT_Int16:
    1005             :         {
    1006             :             using T = int16_t;
    1007          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1008          15 :             return extremum_element<T, IS_MAX>(
    1009             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1010          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1011             :         }
    1012          15 :         case GDT_UInt16:
    1013             :         {
    1014             :             using T = uint16_t;
    1015          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1016          15 :             return extremum_element<T, IS_MAX>(
    1017             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1018          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1019             :         }
    1020          15 :         case GDT_Int32:
    1021             :         {
    1022             :             using T = int32_t;
    1023          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1024          15 :             return extremum_element<T, IS_MAX>(
    1025             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1026          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1027             :         }
    1028          15 :         case GDT_UInt32:
    1029             :         {
    1030             :             using T = uint32_t;
    1031          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1032          15 :             return extremum_element<T, IS_MAX>(
    1033             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1034          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1035             :         }
    1036             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 5, 0)
    1037          15 :         case GDT_Int64:
    1038             :         {
    1039             :             using T = int64_t;
    1040          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1041          15 :             return extremum_element<T, IS_MAX>(
    1042             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1043          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1044             :         }
    1045          15 :         case GDT_UInt64:
    1046             :         {
    1047             :             using T = uint64_t;
    1048          15 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1049          15 :             return extremum_element<T, IS_MAX>(
    1050             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1051          15 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1052             :         }
    1053             : #endif
    1054             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1055          30 :         case GDT_Float16:
    1056             :         {
    1057             :             using T = GFloat16;
    1058          30 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1059          30 :             return extremum_element<T, IS_MAX>(
    1060             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1061          30 :                 bHasNoData ? static_cast<T>(noDataValue) : static_cast<T>(0));
    1062             :         }
    1063             : #endif
    1064          29 :         case GDT_Float32:
    1065             :         {
    1066             :             using T = float;
    1067          29 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1068          29 :             return extremum_element<T, IS_MAX>(
    1069             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1070          29 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1071             :         }
    1072          25 :         case GDT_Float64:
    1073             :         {
    1074             :             using T = double;
    1075          25 :             bHasNoData = bHasNoData && IsValueExactAs<T>(noDataValue);
    1076          25 :             return extremum_element<T, IS_MAX>(
    1077             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1078          25 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1079             :         }
    1080           2 :         case GDT_CInt16:
    1081             :         case GDT_CInt32:
    1082             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1083             :         case GDT_CFloat16:
    1084             : #endif
    1085             :         case GDT_CFloat32:
    1086             :         case GDT_CFloat64:
    1087             :         case GDT_Unknown:
    1088             :         case GDT_TypeCount:
    1089           2 :             break;
    1090             :     }
    1091           2 :     CPLError(CE_Failure, CPLE_NotSupported,
    1092             :              "%s not supported for this data type.", __FUNCTION__);
    1093           2 :     return 0;
    1094             : }
    1095             : 
    1096             : }  // namespace detail
    1097             : 
    1098             : /************************************************************************/
    1099             : /*                            max_element()                             */
    1100             : /************************************************************************/
    1101             : 
    1102             : /** Return the index of the element where the maximum value is hit.
    1103             :  *
    1104             :  * If it is hit in several locations, it is not specified which one will be
    1105             :  * returned.
    1106             :  *
    1107             :  * @param buffer Vector of nElts elements of type eDT.
    1108             :  * @param nElts Number of elements in buffer.
    1109             :  * @param eDT Data type of the elements of buffer.
    1110             :  * @param bHasNoData Whether noDataValue is valid.
    1111             :  * @param noDataValue Nodata value, only taken into account if bHasNoData == true
    1112             :  *
    1113             :  * @since GDAL 3.11
    1114             :  */
    1115             : template <class NODATAType>
    1116          97 : inline size_t max_element(const void *buffer, size_t nElts, GDALDataType eDT,
    1117             :                           bool bHasNoData, NODATAType noDataValue)
    1118             : {
    1119          97 :     return detail::extremum_element<true>(buffer, nElts, eDT, bHasNoData,
    1120          97 :                                           noDataValue);
    1121             : }
    1122             : 
    1123             : /************************************************************************/
    1124             : /*                            min_element()                             */
    1125             : /************************************************************************/
    1126             : 
    1127             : /** Return the index of the element where the minimum value is hit.
    1128             :  *
    1129             :  * If it is hit in several locations, it is not specified which one will be
    1130             :  * returned.
    1131             :  *
    1132             :  * @param buffer Vector of nElts elements of type eDT.
    1133             :  * @param nElts Number of elements in buffer.
    1134             :  * @param eDT Data type of the elements of buffer.
    1135             :  * @param bHasNoData Whether noDataValue is valid.
    1136             :  * @param noDataValue Nodata value, only taken into account if bHasNoData == true
    1137             :  *
    1138             :  * @since GDAL 3.11
    1139             :  */
    1140             : template <class NODATAType>
    1141         129 : inline size_t min_element(const void *buffer, size_t nElts, GDALDataType eDT,
    1142             :                           bool bHasNoData, NODATAType noDataValue)
    1143             : {
    1144         129 :     return detail::extremum_element<false>(buffer, nElts, eDT, bHasNoData,
    1145         129 :                                            noDataValue);
    1146             : }
    1147             : 
    1148             : namespace detail
    1149             : {
    1150             : 
    1151             : #ifdef NOT_EFFICIENT
    1152             : 
    1153             : /************************************************************************/
    1154             : /*                         minmax_element()                             */
    1155             : /************************************************************************/
    1156             : 
    1157             : template <class T>
    1158             : std::pair<size_t, size_t> minmax_element(const T *v, size_t size, T noDataValue)
    1159             : {
    1160             :     static_assert(!(std::is_floating_point_v<T>));
    1161             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1162             :     static_assert(!std::is_same_v<T, GFloat16>);
    1163             : #endif
    1164             :     if (size == 0)
    1165             :         return std::pair(0, 0);
    1166             :     size_t idx_of_min = 0;
    1167             :     size_t idx_of_max = 0;
    1168             :     T vmin = v[0];
    1169             :     T vmax = v[0];
    1170             :     bool extremum_is_nodata = vmin == noDataValue;
    1171             :     size_t i = 1;
    1172             :     for (; i < size; ++i)
    1173             :     {
    1174             :         if (v[i] != noDataValue && (v[i] < vmin || extremum_is_nodata))
    1175             :         {
    1176             :             vmin = v[i];
    1177             :             idx_of_min = i;
    1178             :             extremum_is_nodata = false;
    1179             :         }
    1180             :         if (v[i] != noDataValue && (v[i] > vmax || extremum_is_nodata))
    1181             :         {
    1182             :             vmax = v[i];
    1183             :             idx_of_max = i;
    1184             :             extremum_is_nodata = false;
    1185             :         }
    1186             :     }
    1187             :     return std::pair(idx_of_min, idx_of_max);
    1188             : }
    1189             : 
    1190             : template <class T>
    1191             : std::pair<size_t, size_t> minmax_element(const T *v, size_t size)
    1192             : {
    1193             :     static_assert(!(std::is_floating_point_v<T>));
    1194             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1195             :     static_assert(!std::is_same_v<T, GFloat16>);
    1196             : #endif
    1197             :     if (size == 0)
    1198             :         return std::pair(0, 0);
    1199             :     size_t idx_of_min = 0;
    1200             :     size_t idx_of_max = 0;
    1201             :     T vmin = v[0];
    1202             :     T vmax = v[0];
    1203             :     size_t i = 1;
    1204             :     for (; i < size; ++i)
    1205             :     {
    1206             :         if (v[i] < vmin)
    1207             :         {
    1208             :             vmin = v[i];
    1209             :             idx_of_min = i;
    1210             :         }
    1211             :         if (v[i] > vmax)
    1212             :         {
    1213             :             vmax = v[i];
    1214             :             idx_of_max = i;
    1215             :         }
    1216             :     }
    1217             :     return std::pair(idx_of_min, idx_of_max);
    1218             : }
    1219             : 
    1220             : template <class T>
    1221             : inline std::pair<size_t, size_t> minmax_element_with_nan(const T *v,
    1222             :                                                          size_t size)
    1223             : {
    1224             :     if (size == 0)
    1225             :         return std::pair(0, 0);
    1226             :     size_t idx_of_min = 0;
    1227             :     size_t idx_of_max = 0;
    1228             :     T vmin = v[0];
    1229             :     T vmax = v[0];
    1230             :     size_t i = 1;
    1231             :     if (IsNan(v[0]))
    1232             :     {
    1233             :         for (; i < size; ++i)
    1234             :         {
    1235             :             if (!IsNan(v[i]))
    1236             :             {
    1237             :                 vmin = v[i];
    1238             :                 idx_of_min = i;
    1239             :                 vmax = v[i];
    1240             :                 idx_of_max = i;
    1241             :                 break;
    1242             :             }
    1243             :         }
    1244             :     }
    1245             :     for (; i < size; ++i)
    1246             :     {
    1247             :         if (v[i] < vmin)
    1248             :         {
    1249             :             vmin = v[i];
    1250             :             idx_of_min = i;
    1251             :         }
    1252             :         if (v[i] > vmax)
    1253             :         {
    1254             :             vmax = v[i];
    1255             :             idx_of_max = i;
    1256             :         }
    1257             :     }
    1258             :     return std::pair(idx_of_min, idx_of_max);
    1259             : }
    1260             : 
    1261             : template <>
    1262             : std::pair<size_t, size_t> minmax_element<float>(const float *v, size_t size)
    1263             : {
    1264             :     return minmax_element_with_nan<float>(v, size);
    1265             : }
    1266             : 
    1267             : template <>
    1268             : std::pair<size_t, size_t> minmax_element<double>(const double *v, size_t size)
    1269             : {
    1270             :     return minmax_element_with_nan<double>(v, size);
    1271             : }
    1272             : 
    1273             : template <class T>
    1274             : inline std::pair<size_t, size_t> minmax_element(const T *buffer, size_t size,
    1275             :                                                 bool bHasNoData, T noDataValue)
    1276             : {
    1277             :     if (bHasNoData)
    1278             :     {
    1279             :         return minmax_element<T>(buffer, size, noDataValue);
    1280             :     }
    1281             :     else
    1282             :     {
    1283             :         return minmax_element<T>(buffer, size);
    1284             :     }
    1285             : }
    1286             : #else
    1287             : 
    1288             : /************************************************************************/
    1289             : /*                         minmax_element()                             */
    1290             : /************************************************************************/
    1291             : 
    1292             : template <class T>
    1293        6869 : inline std::pair<size_t, size_t> minmax_element(const T *buffer, size_t size,
    1294             :                                                 bool bHasNoData, T noDataValue)
    1295             : {
    1296             : #ifdef NOT_EFFICIENT
    1297             :     if (bHasNoData)
    1298             :     {
    1299             :         return minmax_element<T>(buffer, size, noDataValue);
    1300             :     }
    1301             :     else
    1302             :     {
    1303             :         return minmax_element<T>(buffer, size);
    1304             :         //auto [imin, imax] = std::minmax_element(buffer, buffer + size);
    1305             :         //return std::pair(imin - buffer, imax - buffer);
    1306             :     }
    1307             : #else
    1308             : 
    1309             : #if !defined(GDAL_MINMAX_ELEMENT_USE_SSE2)
    1310             :     if constexpr (!std::is_floating_point_v<T>
    1311             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1312             :                   && !std::is_same_v<T, GFloat16>
    1313             : #endif
    1314             :     )
    1315             :     {
    1316             :         if (!bHasNoData)
    1317             :         {
    1318             :             auto [min_iter, max_iter] =
    1319             :                 std::minmax_element(buffer, buffer + size);
    1320             :             return std::pair(min_iter - buffer, max_iter - buffer);
    1321             :         }
    1322             :     }
    1323             : #endif
    1324             : 
    1325             :     // Using separately min and max is more efficient than computing them
    1326             :     // within the same loop
    1327             :     return std::pair(
    1328       20607 :         extremum_element<T, false>(buffer, size, bHasNoData, noDataValue),
    1329        6869 :         extremum_element<T, true>(buffer, size, bHasNoData, noDataValue));
    1330             : 
    1331             : #endif
    1332             : }
    1333             : #endif
    1334             : 
    1335             : }  // namespace detail
    1336             : 
    1337             : /************************************************************************/
    1338             : /*                          minmax_element()                            */
    1339             : /************************************************************************/
    1340             : 
    1341             : /** Return the index of the elements where the minimum and maximum values are hit.
    1342             :  *
    1343             :  * If they are hit in several locations, it is not specified which one will be
    1344             :  * returned (contrary to std::minmax_element).
    1345             :  *
    1346             :  * @param buffer Vector of nElts elements of type eDT.
    1347             :  * @param nElts Number of elements in buffer.
    1348             :  * @param eDT Data type of the elements of buffer.
    1349             :  * @param bHasNoData Whether noDataValue is valid.
    1350             :  * @param noDataValue Nodata value, only taken into account if bHasNoData == true
    1351             :  *
    1352             :  * @since GDAL 3.11
    1353             :  */
    1354             : template <class NODATAType>
    1355             : inline std::pair<size_t, size_t>
    1356        6870 : minmax_element(const void *buffer, size_t nElts, GDALDataType eDT,
    1357             :                bool bHasNoData, NODATAType noDataValue)
    1358             : {
    1359        6870 :     switch (eDT)
    1360             :     {
    1361             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 7, 0)
    1362         106 :         case GDT_Int8:
    1363             :         {
    1364             :             using T = int8_t;
    1365         106 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1366         106 :             return detail::minmax_element<T>(
    1367             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1368         106 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1369             :         }
    1370             : #endif
    1371           5 :         case GDT_Byte:
    1372             :         {
    1373             :             using T = uint8_t;
    1374           5 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1375           5 :             return detail::minmax_element<T>(
    1376             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1377           5 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1378             :         }
    1379        1399 :         case GDT_Int16:
    1380             :         {
    1381             :             using T = int16_t;
    1382        1399 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1383        1399 :             return detail::minmax_element<T>(
    1384             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1385        1399 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1386             :         }
    1387           1 :         case GDT_UInt16:
    1388             :         {
    1389             :             using T = uint16_t;
    1390           1 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1391           1 :             return detail::minmax_element<T>(
    1392             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1393           1 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1394             :         }
    1395        1068 :         case GDT_Int32:
    1396             :         {
    1397             :             using T = int32_t;
    1398        1068 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1399        1068 :             return detail::minmax_element<T>(
    1400             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1401        1068 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1402             :         }
    1403         177 :         case GDT_UInt32:
    1404             :         {
    1405             :             using T = uint32_t;
    1406         177 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1407         177 :             return detail::minmax_element<T>(
    1408             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1409         177 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1410             :         }
    1411             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 5, 0)
    1412          21 :         case GDT_Int64:
    1413             :         {
    1414             :             using T = int64_t;
    1415          21 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1416          21 :             return detail::minmax_element<T>(
    1417             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1418          21 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1419             :         }
    1420          17 :         case GDT_UInt64:
    1421             :         {
    1422             :             using T = uint64_t;
    1423          17 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1424          17 :             return detail::minmax_element<T>(
    1425             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1426          17 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1427             :         }
    1428             : #endif
    1429             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1430           5 :         case GDT_Float16:
    1431             :         {
    1432             :             using T = GFloat16;
    1433           5 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1434           5 :             return detail::minmax_element<T>(
    1435             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1436           5 :                 bHasNoData ? static_cast<T>(noDataValue) : static_cast<T>(0));
    1437             :         }
    1438             : #endif
    1439         634 :         case GDT_Float32:
    1440             :         {
    1441             :             using T = float;
    1442         634 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1443         634 :             return detail::minmax_element<T>(
    1444             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1445         634 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1446             :         }
    1447        3436 :         case GDT_Float64:
    1448             :         {
    1449             :             using T = double;
    1450        3436 :             bHasNoData = bHasNoData && detail::IsValueExactAs<T>(noDataValue);
    1451        3436 :             return detail::minmax_element<T>(
    1452             :                 static_cast<const T *>(buffer), nElts, bHasNoData,
    1453        3436 :                 bHasNoData ? static_cast<T>(noDataValue) : 0);
    1454             :         }
    1455           1 :         case GDT_CInt16:
    1456             :         case GDT_CInt32:
    1457             : #if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3, 11, 0)
    1458             :         case GDT_CFloat16:
    1459             : #endif
    1460             :         case GDT_CFloat32:
    1461             :         case GDT_CFloat64:
    1462             :         case GDT_Unknown:
    1463             :         case GDT_TypeCount:
    1464           1 :             break;
    1465             :     }
    1466           1 :     CPLError(CE_Failure, CPLE_NotSupported,
    1467             :              "%s not supported for this data type.", __FUNCTION__);
    1468           1 :     return std::pair(0, 0);
    1469             : }
    1470             : 
    1471             : }  // namespace GDAL_MINMAXELT_NS
    1472             : 
    1473             : #endif  // GDAL_MINMAX_ELEMENT_INCLUDED

Generated by: LCOV version 1.14