LCOV - code coverage report
Current view: top level - alg - gdalwarpkernel.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 3503 4049 86.5 %
Date: 2026-05-28 11:29:44 Functions: 240 277 86.6 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  High Performance Image Reprojector
       4             :  * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
       5             :  *           image warping for a "chunk" of input and output imagery already
       6             :  *           loaded into memory.
       7             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       8             :  *
       9             :  ******************************************************************************
      10             :  * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
      11             :  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
      12             :  *
      13             :  * SPDX-License-Identifier: MIT
      14             :  ****************************************************************************/
      15             : 
      16             : #include "cpl_port.h"
      17             : #include "gdalwarper.h"
      18             : 
      19             : #include <cfloat>
      20             : #include <cmath>
      21             : #include <cstddef>
      22             : #include <cstdlib>
      23             : #include <cstring>
      24             : 
      25             : #include <algorithm>
      26             : #include <limits>
      27             : #include <mutex>
      28             : #include <new>
      29             : #include <utility>
      30             : #include <vector>
      31             : 
      32             : #include "cpl_atomic_ops.h"
      33             : #include "cpl_conv.h"
      34             : #include "cpl_error.h"
      35             : #include "cpl_float.h"
      36             : #include "cpl_mask.h"
      37             : #include "cpl_multiproc.h"
      38             : #include "cpl_progress.h"
      39             : #include "cpl_string.h"
      40             : #include "cpl_vsi.h"
      41             : #include "cpl_worker_thread_pool.h"
      42             : #include "cpl_quad_tree.h"
      43             : #include "gdal.h"
      44             : #include "gdal_alg.h"
      45             : #include "gdal_alg_priv.h"
      46             : #include "gdal_thread_pool.h"
      47             : #include "gdalresamplingkernels.h"
      48             : 
      49             : // #define CHECK_SUM_WITH_GEOS
      50             : #ifdef CHECK_SUM_WITH_GEOS
      51             : #include "ogr_geometry.h"
      52             : #include "ogr_geos.h"
      53             : #endif
      54             : 
      55             : #ifdef USE_NEON_OPTIMIZATIONS
      56             : #include "include_sse2neon.h"
      57             : #define USE_SSE2
      58             : 
      59             : #include "gdalsse_priv.h"
      60             : 
      61             : // We restrict to 64bit processors because they are guaranteed to have SSE2.
      62             : // Could possibly be used too on 32bit, but we would need to check at runtime.
      63             : #elif defined(__x86_64) || defined(_M_X64)
      64             : #define USE_SSE2
      65             : 
      66             : #include "gdalsse_priv.h"
      67             : 
      68             : #if __SSE4_1__
      69             : #include <smmintrin.h>
      70             : #endif
      71             : 
      72             : #if __SSE3__
      73             : #include <pmmintrin.h>
      74             : #endif
      75             : 
      76             : #endif
      77             : 
      78             : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
      79             : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
      80             : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
      81             : 
      82             : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
      83             : 
      84             : static const int anGWKFilterRadius[] = {
      85             :     0,  // Nearest neighbour
      86             :     1,  // Bilinear
      87             :     2,  // Cubic Convolution (Catmull-Rom)
      88             :     2,  // Cubic B-Spline
      89             :     3,  // Lanczos windowed sinc
      90             :     0,  // Average
      91             :     0,  // Mode
      92             :     0,  // Reserved GRA_Gauss=7
      93             :     0,  // Max
      94             :     0,  // Min
      95             :     0,  // Med
      96             :     0,  // Q1
      97             :     0,  // Q3
      98             :     0,  // Sum
      99             :     0,  // RMS
     100             : };
     101             : 
     102             : static double GWKBilinear(double dfX);
     103             : static double GWKCubic(double dfX);
     104             : static double GWKBSpline(double dfX);
     105             : static double GWKLanczosSinc(double dfX);
     106             : 
     107             : static const FilterFuncType apfGWKFilter[] = {
     108             :     nullptr,         // Nearest neighbour
     109             :     GWKBilinear,     // Bilinear
     110             :     GWKCubic,        // Cubic Convolution (Catmull-Rom)
     111             :     GWKBSpline,      // Cubic B-Spline
     112             :     GWKLanczosSinc,  // Lanczos windowed sinc
     113             :     nullptr,         // Average
     114             :     nullptr,         // Mode
     115             :     nullptr,         // Reserved GRA_Gauss=7
     116             :     nullptr,         // Max
     117             :     nullptr,         // Min
     118             :     nullptr,         // Med
     119             :     nullptr,         // Q1
     120             :     nullptr,         // Q3
     121             :     nullptr,         // Sum
     122             :     nullptr,         // RMS
     123             : };
     124             : 
     125             : // TODO(schwehr): Can we make these functions have a const * const arg?
     126             : static double GWKBilinear4Values(double *padfVals);
     127             : static double GWKCubic4Values(double *padfVals);
     128             : static double GWKBSpline4Values(double *padfVals);
     129             : static double GWKLanczosSinc4Values(double *padfVals);
     130             : 
     131             : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
     132             :     nullptr,                // Nearest neighbour
     133             :     GWKBilinear4Values,     // Bilinear
     134             :     GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
     135             :     GWKBSpline4Values,      // Cubic B-Spline
     136             :     GWKLanczosSinc4Values,  // Lanczos windowed sinc
     137             :     nullptr,                // Average
     138             :     nullptr,                // Mode
     139             :     nullptr,                // Reserved GRA_Gauss=7
     140             :     nullptr,                // Max
     141             :     nullptr,                // Min
     142             :     nullptr,                // Med
     143             :     nullptr,                // Q1
     144             :     nullptr,                // Q3
     145             :     nullptr,                // Sum
     146             :     nullptr,                // RMS
     147             : };
     148             : 
     149       21284 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
     150             : {
     151             :     static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
     152             :                   "Bad size of anGWKFilterRadius");
     153       21284 :     return anGWKFilterRadius[eResampleAlg];
     154             : }
     155             : 
     156        9579 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
     157             : {
     158             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
     159             :                   "Bad size of apfGWKFilter");
     160        9579 :     return apfGWKFilter[eResampleAlg];
     161             : }
     162             : 
     163        9579 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
     164             : {
     165             :     static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
     166             :                   "Bad size of apfGWKFilter4Values");
     167        9579 :     return apfGWKFilter4Values[eResampleAlg];
     168             : }
     169             : 
     170             : static CPLErr GWKGeneralCase(GDALWarpKernel *);
     171             : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
     172             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     173             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     174             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     175             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     176             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     177             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     178             : #endif
     179             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
     180             : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
     181             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     182             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     183             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     184             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
     185             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
     186             : #endif
     187             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     188             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
     189             : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK);
     190             : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
     191             : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
     192             : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
     193             : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
     194             : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
     195             : static CPLErr GWKSumPreserving(GDALWarpKernel *);
     196             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     197             : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     198             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
     199             : 
     200             : /************************************************************************/
     201             : /*                             GWKJobStruct                             */
     202             : /************************************************************************/
     203             : 
     204             : struct GWKJobStruct
     205             : {
     206             :     std::mutex &mutex;
     207             :     std::condition_variable &cv;
     208             :     int counterSingleThreaded = 0;
     209             :     int &counter;
     210             :     bool &stopFlag;
     211             :     GDALWarpKernel *poWK = nullptr;
     212             :     int iYMin = 0;
     213             :     int iYMax = 0;
     214             :     int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
     215             :     void *pTransformerArg = nullptr;
     216             :     // used by GWKRun() to assign the proper pTransformerArg
     217             :     void (*pfnFunc)(void *) = nullptr;
     218             : 
     219        3231 :     GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
     220             :                  int &counter_, bool &stopFlag_)
     221        3231 :         : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
     222             :     {
     223        3231 :     }
     224             : };
     225             : 
     226             : struct GWKThreadData
     227             : {
     228             :     std::unique_ptr<CPLJobQueue> poJobQueue{};
     229             :     std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
     230             :     int nMaxThreads{0};
     231             :     int counter{0};
     232             :     bool stopFlag{false};
     233             :     std::mutex mutex{};
     234             :     std::condition_variable cv{};
     235             :     bool bTransformerArgInputAssignedToThread{false};
     236             :     void *pTransformerArgInput{
     237             :         nullptr};  // owned by calling layer. Not to be destroyed
     238             :     std::map<GIntBig, void *> mapThreadToTransformerArg{};
     239             :     int nTotalThreadCountForThisRun = 0;
     240             :     int nCurThreadCountForThisRun = 0;
     241             : };
     242             : 
     243             : /************************************************************************/
     244             : /*                         GWKProgressThread()                          */
     245             : /************************************************************************/
     246             : 
     247             : // Return TRUE if the computation must be interrupted.
     248          36 : static int GWKProgressThread(GWKJobStruct *psJob)
     249             : {
     250          36 :     bool stop = false;
     251             :     {
     252          36 :         std::lock_guard<std::mutex> lock(psJob->mutex);
     253          36 :         psJob->counter++;
     254          36 :         stop = psJob->stopFlag;
     255             :     }
     256          36 :     psJob->cv.notify_one();
     257             : 
     258          36 :     return stop;
     259             : }
     260             : 
     261             : /************************************************************************/
     262             : /*                       GWKProgressMonoThread()                        */
     263             : /************************************************************************/
     264             : 
     265             : // Return TRUE if the computation must be interrupted.
     266      446697 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
     267             : {
     268      446697 :     GDALWarpKernel *poWK = psJob->poWK;
     269      446697 :     if (!poWK->pfnProgress(poWK->dfProgressBase +
     270      446697 :                                poWK->dfProgressScale *
     271      446697 :                                    (++psJob->counterSingleThreaded /
     272      446697 :                                     static_cast<double>(psJob->iYMax)),
     273             :                            "", poWK->pProgress))
     274             :     {
     275           1 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     276           1 :         psJob->stopFlag = true;
     277           1 :         return TRUE;
     278             :     }
     279      446696 :     return FALSE;
     280             : }
     281             : 
     282             : /************************************************************************/
     283             : /*                        GWKGenericMonoThread()                        */
     284             : /************************************************************************/
     285             : 
     286        3206 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
     287             :                                    void (*pfnFunc)(void *pUserData))
     288             : {
     289        3206 :     GWKThreadData td;
     290             : 
     291             :     // NOTE: the mutex is not used.
     292        3206 :     GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
     293        3206 :     job.poWK = poWK;
     294        3206 :     job.iYMin = 0;
     295        3206 :     job.iYMax = poWK->nDstYSize;
     296        3206 :     job.pfnProgress = GWKProgressMonoThread;
     297        3206 :     job.pTransformerArg = poWK->pTransformerArg;
     298        3206 :     job.counterSingleThreaded = td.counter;
     299        3206 :     pfnFunc(&job);
     300        3206 :     td.counter = job.counterSingleThreaded;
     301             : 
     302        6412 :     return td.stopFlag ? CE_Failure : CE_None;
     303             : }
     304             : 
     305             : /************************************************************************/
     306             : /*                          GWKThreadsCreate()                          */
     307             : /************************************************************************/
     308             : 
     309        1811 : void *GWKThreadsCreate(char **papszWarpOptions,
     310             :                        GDALTransformerFunc /* pfnTransformer */,
     311             :                        void *pTransformerArg)
     312             : {
     313        1811 :     const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
     314             :                                            GDAL_DEFAULT_MAX_THREAD_COUNT,
     315             :                                            /* bDefaultAllCPUs = */ false);
     316        1811 :     GWKThreadData *psThreadData = new GWKThreadData();
     317             :     auto poThreadPool =
     318        1811 :         nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
     319        1811 :     if (poThreadPool)
     320             :     {
     321          25 :         psThreadData->nMaxThreads = nThreads;
     322          25 :         psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
     323             :             nThreads,
     324          25 :             GWKJobStruct(psThreadData->mutex, psThreadData->cv,
     325          50 :                          psThreadData->counter, psThreadData->stopFlag)));
     326             : 
     327          25 :         psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
     328          25 :         psThreadData->pTransformerArgInput = pTransformerArg;
     329             :     }
     330             : 
     331        1811 :     return psThreadData;
     332             : }
     333             : 
     334             : /************************************************************************/
     335             : /*                           GWKThreadsEnd()                            */
     336             : /************************************************************************/
     337             : 
     338        1811 : void GWKThreadsEnd(void *psThreadDataIn)
     339             : {
     340        1811 :     if (psThreadDataIn == nullptr)
     341           0 :         return;
     342             : 
     343        1811 :     GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
     344        1811 :     if (psThreadData->poJobQueue)
     345             :     {
     346             :         // cppcheck-suppress constVariableReference
     347          35 :         for (auto &pair : psThreadData->mapThreadToTransformerArg)
     348             :         {
     349          10 :             CPLAssert(pair.second != psThreadData->pTransformerArgInput);
     350          10 :             GDALDestroyTransformer(pair.second);
     351             :         }
     352          25 :         psThreadData->poJobQueue.reset();
     353             :     }
     354        1811 :     delete psThreadData;
     355             : }
     356             : 
     357             : /************************************************************************/
     358             : /*                         ThreadFuncAdapter()                          */
     359             : /************************************************************************/
     360             : 
     361          34 : static void ThreadFuncAdapter(void *pData)
     362             : {
     363          34 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
     364          34 :     GWKThreadData *psThreadData =
     365          34 :         static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
     366             : 
     367             :     // Look if we have already a per-thread transformer
     368          34 :     void *pTransformerArg = nullptr;
     369          34 :     const GIntBig nThreadId = CPLGetPID();
     370             : 
     371             :     {
     372          68 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     373          34 :         ++psThreadData->nCurThreadCountForThisRun;
     374             : 
     375          34 :         auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
     376          34 :         if (oIter != psThreadData->mapThreadToTransformerArg.end())
     377             :         {
     378           0 :             pTransformerArg = oIter->second;
     379             :         }
     380          34 :         else if (!psThreadData->bTransformerArgInputAssignedToThread &&
     381          34 :                  psThreadData->nCurThreadCountForThisRun ==
     382          34 :                      psThreadData->nTotalThreadCountForThisRun)
     383             :         {
     384             :             // If we are the last thread to be started, temporarily borrow the
     385             :             // original transformer
     386          24 :             psThreadData->bTransformerArgInputAssignedToThread = true;
     387          24 :             pTransformerArg = psThreadData->pTransformerArgInput;
     388          24 :             psThreadData->mapThreadToTransformerArg[nThreadId] =
     389             :                 pTransformerArg;
     390             :         }
     391             : 
     392          34 :         if (pTransformerArg == nullptr)
     393             :         {
     394          10 :             CPLAssert(psThreadData->pTransformerArgInput != nullptr);
     395          10 :             CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
     396             :         }
     397             :     }
     398             : 
     399             :     // If no transformer assigned to current thread, instantiate one
     400          34 :     if (pTransformerArg == nullptr)
     401             :     {
     402             :         // This somehow assumes that GDALCloneTransformer() is thread-safe
     403             :         // which should normally be the case.
     404             :         pTransformerArg =
     405          10 :             GDALCloneTransformer(psThreadData->pTransformerArgInput);
     406             : 
     407             :         // Lock for the stop flag and the transformer map.
     408          10 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     409          10 :         if (!pTransformerArg)
     410             :         {
     411           0 :             psJob->stopFlag = true;
     412           0 :             return;
     413             :         }
     414          10 :         psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
     415             :     }
     416             : 
     417          34 :     psJob->pTransformerArg = pTransformerArg;
     418          34 :     psJob->pfnFunc(pData);
     419             : 
     420             :     // Give back original transformer, if borrowed.
     421             :     {
     422          68 :         std::lock_guard<std::mutex> lock(psThreadData->mutex);
     423          34 :         if (psThreadData->bTransformerArgInputAssignedToThread &&
     424          27 :             pTransformerArg == psThreadData->pTransformerArgInput)
     425             :         {
     426             :             psThreadData->mapThreadToTransformerArg.erase(
     427          24 :                 psThreadData->mapThreadToTransformerArg.find(nThreadId));
     428          24 :             psThreadData->bTransformerArgInputAssignedToThread = false;
     429             :         }
     430             :     }
     431             : }
     432             : 
     433             : /************************************************************************/
     434             : /*                               GWKRun()                               */
     435             : /************************************************************************/
     436             : 
     437        3230 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
     438             :                      void (*pfnFunc)(void *pUserData))
     439             : 
     440             : {
     441        3230 :     const int nDstYSize = poWK->nDstYSize;
     442             : 
     443        3230 :     CPLDebug("GDAL",
     444             :              "GDALWarpKernel()::%s() "
     445             :              "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
     446             :              pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
     447             :              poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
     448             :              poWK->nDstYSize);
     449             : 
     450        3230 :     if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
     451             :     {
     452           0 :         CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     453           0 :         return CE_Failure;
     454             :     }
     455             : 
     456        3230 :     GWKThreadData *psThreadData =
     457             :         static_cast<GWKThreadData *>(poWK->psThreadData);
     458        3230 :     if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
     459             :     {
     460        3206 :         return GWKGenericMonoThread(poWK, pfnFunc);
     461             :     }
     462             : 
     463          24 :     int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
     464             :     // Config option mostly useful for tests to be able to test multithreading
     465             :     // with small rasters
     466             :     const int nWarpChunkSize =
     467          24 :         atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
     468          24 :     if (nWarpChunkSize > 0)
     469             :     {
     470          22 :         GIntBig nChunks =
     471          22 :             static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
     472          22 :         if (nThreads > nChunks)
     473          17 :             nThreads = static_cast<int>(nChunks);
     474             :     }
     475          24 :     if (nThreads <= 0)
     476          20 :         nThreads = 1;
     477             : 
     478          24 :     CPLDebug("WARP", "Using %d threads", nThreads);
     479             : 
     480          24 :     auto &jobs = *psThreadData->threadJobs;
     481          24 :     CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
     482             :     // Fill-in job structures.
     483          58 :     for (int i = 0; i < nThreads; ++i)
     484             :     {
     485          34 :         auto &job = jobs[i];
     486          34 :         job.poWK = poWK;
     487          34 :         job.iYMin =
     488          34 :             static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
     489          34 :         job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
     490          34 :                                      nThreads);
     491          34 :         if (poWK->pfnProgress != GDALDummyProgress)
     492           2 :             job.pfnProgress = GWKProgressThread;
     493          34 :         job.pfnFunc = pfnFunc;
     494             :     }
     495             : 
     496             :     bool bStopFlag;
     497             :     {
     498             :         {
     499             :             // Important: do not run the SubmitJob() loop under the mutex
     500             :             // because in some cases (typically if the current thread has been
     501             :             // created by the GDAL global thread pool), the task will actually
     502             :             // be run synchronously by SubmitJob(), and as it tries to acquire
     503             :             // the mutex, that would result in a dead-lock
     504          24 :             std::unique_lock<std::mutex> lock(psThreadData->mutex);
     505             : 
     506          24 :             psThreadData->nTotalThreadCountForThisRun = nThreads;
     507          24 :             psThreadData->nCurThreadCountForThisRun = 0;
     508             :         }
     509             : 
     510             :         // Start jobs.
     511          58 :         for (int i = 0; i < nThreads; ++i)
     512             :         {
     513          34 :             auto &job = jobs[i];
     514          34 :             psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
     515             :                                                 static_cast<void *>(&job));
     516             :         }
     517             : 
     518             :         /* --------------------------------------------------------------------
     519             :          */
     520             :         /*      Report progress. */
     521             :         /* --------------------------------------------------------------------
     522             :          */
     523          24 :         std::unique_lock<std::mutex> lock(psThreadData->mutex);
     524          24 :         if (poWK->pfnProgress != GDALDummyProgress)
     525             :         {
     526           3 :             while (psThreadData->counter < nDstYSize)
     527             :             {
     528           1 :                 psThreadData->cv.wait(lock);
     529           1 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     530           1 :                                            poWK->dfProgressScale *
     531           1 :                                                (psThreadData->counter /
     532           1 :                                                 static_cast<double>(nDstYSize)),
     533             :                                        "", poWK->pProgress))
     534             :                 {
     535           0 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     536           0 :                     psThreadData->stopFlag = true;
     537           0 :                     break;
     538             :                 }
     539             :             }
     540             : 
     541           2 :             if (!psThreadData->stopFlag)
     542             :             {
     543           2 :                 if (!poWK->pfnProgress(poWK->dfProgressBase +
     544           2 :                                            poWK->dfProgressScale,
     545             :                                        "", poWK->pProgress))
     546             :                 {
     547           1 :                     CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
     548           1 :                     psThreadData->stopFlag = true;
     549             :                 }
     550             :             }
     551             :         }
     552             : 
     553          24 :         bStopFlag = psThreadData->stopFlag;
     554             :     }
     555             : 
     556             :     /* -------------------------------------------------------------------- */
     557             :     /*      Wait for all jobs to complete.                                  */
     558             :     /* -------------------------------------------------------------------- */
     559          24 :     psThreadData->poJobQueue->WaitCompletion();
     560             : 
     561          24 :     return bStopFlag ? CE_Failure : CE_None;
     562             : }
     563             : 
     564             : /************************************************************************/
     565             : /* ==================================================================== */
     566             : /*                            GDALWarpKernel                            */
     567             : /* ==================================================================== */
     568             : /************************************************************************/
     569             : 
     570             : /**
     571             :  * \class GDALWarpKernel "gdalwarper.h"
     572             :  *
     573             :  * Low level image warping class.
     574             :  *
     575             :  * This class is responsible for low level image warping for one
     576             :  * "chunk" of imagery.  The class is essentially a structure with all
     577             :  * data members public - primarily so that new special-case functions
     578             :  * can be added without changing the class declaration.
     579             :  *
     580             :  * Applications are normally intended to interactive with warping facilities
     581             :  * through the GDALWarpOperation class, though the GDALWarpKernel can in
     582             :  * theory be used directly if great care is taken in setting up the
     583             :  * control data.
     584             :  *
     585             :  * <h3>Design Issues</h3>
     586             :  *
     587             :  * The intention is that PerformWarp() would analyze the setup in terms
     588             :  * of the datatype, resampling type, and validity/density mask usage and
     589             :  * pick one of many specific implementations of the warping algorithm over
     590             :  * a continuum of optimization vs. generality.  At one end there will be a
     591             :  * reference general purpose implementation of the algorithm that supports
     592             :  * any data type (working internally in double precision complex), all three
     593             :  * resampling types, and any or all of the validity/density masks.  At the
     594             :  * other end would be highly optimized algorithms for common cases like
     595             :  * nearest neighbour resampling on GDT_UInt8 data with no masks.
     596             :  *
     597             :  * The full set of optimized versions have not been decided but we should
     598             :  * expect to have at least:
     599             :  *  - One for each resampling algorithm for 8bit data with no masks.
     600             :  *  - One for each resampling algorithm for float data with no masks.
     601             :  *  - One for each resampling algorithm for float data with any/all masks
     602             :  *    (essentially the generic case for just float data).
     603             :  *  - One for each resampling algorithm for 8bit data with support for
     604             :  *    input validity masks (per band or per pixel).  This handles the common
     605             :  *    case of nodata masking.
     606             :  *  - One for each resampling algorithm for float data with support for
     607             :  *    input validity masks (per band or per pixel).  This handles the common
     608             :  *    case of nodata masking.
     609             :  *
     610             :  * Some of the specializations would operate on all bands in one pass
     611             :  * (especially the ones without masking would do this), while others might
     612             :  * process each band individually to reduce code complexity.
     613             :  *
     614             :  * <h3>Masking Semantics</h3>
     615             :  *
     616             :  * A detailed explanation of the semantics of the validity and density masks,
     617             :  * and their effects on resampling kernels is needed here.
     618             :  */
     619             : 
     620             : /************************************************************************/
     621             : /*                     GDALWarpKernel Data Members                      */
     622             : /************************************************************************/
     623             : 
     624             : /**
     625             :  * \var GDALResampleAlg GDALWarpKernel::eResample;
     626             :  *
     627             :  * Resampling algorithm.
     628             :  *
     629             :  * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
     630             :  * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
     631             :  * GRA_Mode or GRA_Sum.
     632             :  *
     633             :  * This field is required. GDT_NearestNeighbour may be used as a default
     634             :  * value.
     635             :  */
     636             : 
     637             : /**
     638             :  * \var GDALDataType GDALWarpKernel::eWorkingDataType;
     639             :  *
     640             :  * Working pixel data type.
     641             :  *
     642             :  * The datatype of pixels in the source image (papabySrcimage) and
     643             :  * destination image (papabyDstImage) buffers.  Note that operations on
     644             :  * some data types (such as GDT_UInt8) may be much better optimized than other
     645             :  * less common cases.
     646             :  *
     647             :  * This field is required.  It may not be GDT_Unknown.
     648             :  */
     649             : 
     650             : /**
     651             :  * \var int GDALWarpKernel::nBands;
     652             :  *
     653             :  * Number of bands.
     654             :  *
     655             :  * The number of bands (layers) of imagery being warped.  Determines the
     656             :  * number of entries in the papabySrcImage, papanBandSrcValid,
     657             :  * and papabyDstImage arrays.
     658             :  *
     659             :  * This field is required.
     660             :  */
     661             : 
     662             : /**
     663             :  * \var int GDALWarpKernel::nSrcXSize;
     664             :  *
     665             :  * Source image width in pixels.
     666             :  *
     667             :  * This field is required.
     668             :  */
     669             : 
     670             : /**
     671             :  * \var int GDALWarpKernel::nSrcYSize;
     672             :  *
     673             :  * Source image height in pixels.
     674             :  *
     675             :  * This field is required.
     676             :  */
     677             : 
     678             : /**
     679             :  * \var double GDALWarpKernel::dfSrcXExtraSize;
     680             :  *
     681             :  * Number of pixels included in nSrcXSize that are present on the edges of
     682             :  * the area of interest to take into account the width of the kernel.
     683             :  *
     684             :  * This field is required.
     685             :  */
     686             : 
     687             : /**
     688             :  * \var double GDALWarpKernel::dfSrcYExtraSize;
     689             :  *
     690             :  * Number of pixels included in nSrcYExtraSize that are present on the edges of
     691             :  * the area of interest to take into account the height of the kernel.
     692             :  *
     693             :  * This field is required.
     694             :  */
     695             : 
     696             : /**
     697             :  * \var int GDALWarpKernel::papabySrcImage;
     698             :  *
     699             :  * Array of source image band data.
     700             :  *
     701             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     702             :  * to image data.  Each individual band of image data is organized as a single
     703             :  * block of image data in left to right, then bottom to top order.  The actual
     704             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     705             :  *
     706             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     707             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     708             :  * this:
     709             :  *
     710             :  * \code
     711             :  *   float dfPixelValue;
     712             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     713             :  *   int   nPixel = 3; // Zero based.
     714             :  *   int   nLine = 4;  // Zero based.
     715             :  *
     716             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     717             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     718             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     719             :  *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
     720             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     721             :  * \endcode
     722             :  *
     723             :  * This field is required.
     724             :  */
     725             : 
     726             : /**
     727             :  * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
     728             :  *
     729             :  * Per band validity mask for source pixels.
     730             :  *
     731             :  * Array of pixel validity mask layers for each source band.   Each of
     732             :  * the mask layers is the same size (in pixels) as the source image with
     733             :  * one bit per pixel.  Note that it is legal (and common) for this to be
     734             :  * NULL indicating that none of the pixels are invalidated, or for some
     735             :  * band validity masks to be NULL in which case all pixels of the band are
     736             :  * valid.  The following code can be used to test the validity of a particular
     737             :  * pixel.
     738             :  *
     739             :  * \code
     740             :  *   int   bIsValid = TRUE;
     741             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     742             :  *   int   nPixel = 3; // Zero based.
     743             :  *   int   nLine = 4;  // Zero based.
     744             :  *
     745             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     746             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     747             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     748             :  *
     749             :  *   if( poKern->papanBandSrcValid != NULL
     750             :  *       && poKern->papanBandSrcValid[nBand] != NULL )
     751             :  *   {
     752             :  *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
     753             :  *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
     754             :  *
     755             :  *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
     756             :  *   }
     757             :  * \endcode
     758             :  */
     759             : 
     760             : /**
     761             :  * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
     762             :  *
     763             :  * Per pixel validity mask for source pixels.
     764             :  *
     765             :  * A single validity mask layer that applies to the pixels of all source
     766             :  * bands.  It is accessed similarly to papanBandSrcValid, but without the
     767             :  * extra level of band indirection.
     768             :  *
     769             :  * This pointer may be NULL indicating that all pixels are valid.
     770             :  *
     771             :  * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
     772             :  * the pixel isn't considered to be valid unless both arrays indicate it is
     773             :  * valid.
     774             :  */
     775             : 
     776             : /**
     777             :  * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
     778             :  *
     779             :  * Per pixel density mask for source pixels.
     780             :  *
     781             :  * A single density mask layer that applies to the pixels of all source
     782             :  * bands.  It contains values between 0.0 and 1.0 indicating the degree to
     783             :  * which this pixel should be allowed to contribute to the output result.
     784             :  *
     785             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     786             :  *
     787             :  * The density for a pixel may be accessed like this:
     788             :  *
     789             :  * \code
     790             :  *   float fDensity = 1.0;
     791             :  *   int nPixel = 3;  // Zero based.
     792             :  *   int nLine = 4;   // Zero based.
     793             :  *
     794             :  *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
     795             :  *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
     796             :  *   if( poKern->pafUnifiedSrcDensity != NULL )
     797             :  *     fDensity = poKern->pafUnifiedSrcDensity
     798             :  *                                  [nPixel + nLine * poKern->nSrcXSize];
     799             :  * \endcode
     800             :  */
     801             : 
     802             : /**
     803             :  * \var int GDALWarpKernel::nDstXSize;
     804             :  *
     805             :  * Width of destination image in pixels.
     806             :  *
     807             :  * This field is required.
     808             :  */
     809             : 
     810             : /**
     811             :  * \var int GDALWarpKernel::nDstYSize;
     812             :  *
     813             :  * Height of destination image in pixels.
     814             :  *
     815             :  * This field is required.
     816             :  */
     817             : 
     818             : /**
     819             :  * \var GByte **GDALWarpKernel::papabyDstImage;
     820             :  *
     821             :  * Array of destination image band data.
     822             :  *
     823             :  * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
     824             :  * to image data.  Each individual band of image data is organized as a single
     825             :  * block of image data in left to right, then bottom to top order.  The actual
     826             :  * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
     827             :  *
     828             :  * To access the pixel value for the (x=3, y=4) pixel (zero based) of
     829             :  * the second band with eWorkingDataType set to GDT_Float32 use code like
     830             :  * this:
     831             :  *
     832             :  * \code
     833             :  *   float dfPixelValue;
     834             :  *   int   nBand = 2-1;  // Band indexes are zero based.
     835             :  *   int   nPixel = 3; // Zero based.
     836             :  *   int   nLine = 4;  // Zero based.
     837             :  *
     838             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     839             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     840             :  *   assert( nBand >= 0 && nBand < poKern->nBands );
     841             :  *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
     842             :  *                                  [nPixel + nLine * poKern->nSrcYSize];
     843             :  * \endcode
     844             :  *
     845             :  * This field is required.
     846             :  */
     847             : 
     848             : /**
     849             :  * \var GUInt32 *GDALWarpKernel::panDstValid;
     850             :  *
     851             :  * Per pixel validity mask for destination pixels.
     852             :  *
     853             :  * A single validity mask layer that applies to the pixels of all destination
     854             :  * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
     855             :  * on the size of the destination image.
     856             :  *
     857             :  * This pointer may be NULL indicating that all pixels are valid.
     858             :  */
     859             : 
     860             : /**
     861             :  * \var float *GDALWarpKernel::pafDstDensity;
     862             :  *
     863             :  * Per pixel density mask for destination pixels.
     864             :  *
     865             :  * A single density mask layer that applies to the pixels of all destination
     866             :  * bands.  It contains values between 0.0 and 1.0.
     867             :  *
     868             :  * This pointer may be NULL indicating that all pixels have a density of 1.0.
     869             :  *
     870             :  * The density for a pixel may be accessed like this:
     871             :  *
     872             :  * \code
     873             :  *   float fDensity = 1.0;
     874             :  *   int   nPixel = 3; // Zero based.
     875             :  *   int   nLine = 4;  // Zero based.
     876             :  *
     877             :  *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
     878             :  *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
     879             :  *   if( poKern->pafDstDensity != NULL )
     880             :  *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
     881             :  * \endcode
     882             :  */
     883             : 
     884             : /**
     885             :  * \var int GDALWarpKernel::nSrcXOff;
     886             :  *
     887             :  * X offset to source pixel coordinates for transformation.
     888             :  *
     889             :  * See pfnTransformer.
     890             :  *
     891             :  * This field is required.
     892             :  */
     893             : 
     894             : /**
     895             :  * \var int GDALWarpKernel::nSrcYOff;
     896             :  *
     897             :  * Y offset to source pixel coordinates for transformation.
     898             :  *
     899             :  * See pfnTransformer.
     900             :  *
     901             :  * This field is required.
     902             :  */
     903             : 
     904             : /**
     905             :  * \var int GDALWarpKernel::nDstXOff;
     906             :  *
     907             :  * X offset to destination pixel coordinates for transformation.
     908             :  *
     909             :  * See pfnTransformer.
     910             :  *
     911             :  * This field is required.
     912             :  */
     913             : 
     914             : /**
     915             :  * \var int GDALWarpKernel::nDstYOff;
     916             :  *
     917             :  * Y offset to destination pixel coordinates for transformation.
     918             :  *
     919             :  * See pfnTransformer.
     920             :  *
     921             :  * This field is required.
     922             :  */
     923             : 
     924             : /**
     925             :  * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
     926             :  *
     927             :  * Source/destination location transformer.
     928             :  *
     929             :  * The function to call to transform coordinates between source image
     930             :  * pixel/line coordinates and destination image pixel/line coordinates.
     931             :  * See GDALTransformerFunc() for details of the semantics of this function.
     932             :  *
     933             :  * The GDALWarpKern algorithm will only ever use this transformer in
     934             :  * "destination to source" mode (bDstToSrc=TRUE), and will always pass
     935             :  * partial or complete scanlines of points in the destination image as
     936             :  * input.  This means, among other things, that it is safe to the
     937             :  * approximating transform GDALApproxTransform() as the transformation
     938             :  * function.
     939             :  *
     940             :  * Source and destination images may be subsets of a larger overall image.
     941             :  * The transformation algorithms will expect and return pixel/line coordinates
     942             :  * in terms of this larger image, so coordinates need to be offset by
     943             :  * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
     944             :  * passing to pfnTransformer, and after return from it.
     945             :  *
     946             :  * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
     947             :  * data to this function when it is called.
     948             :  *
     949             :  * This field is required.
     950             :  */
     951             : 
     952             : /**
     953             :  * \var void *GDALWarpKernel::pTransformerArg;
     954             :  *
     955             :  * Callback data for pfnTransformer.
     956             :  *
     957             :  * This field may be NULL if not required for the pfnTransformer being used.
     958             :  */
     959             : 
     960             : /**
     961             :  * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
     962             :  *
     963             :  * The function to call to report progress of the algorithm, and to check
     964             :  * for a requested termination of the operation.  It operates according to
     965             :  * GDALProgressFunc() semantics.
     966             :  *
     967             :  * Generally speaking the progress function will be invoked for each
     968             :  * scanline of the destination buffer that has been processed.
     969             :  *
     970             :  * This field may be NULL (internally set to GDALDummyProgress()).
     971             :  */
     972             : 
     973             : /**
     974             :  * \var void *GDALWarpKernel::pProgress;
     975             :  *
     976             :  * Callback data for pfnProgress.
     977             :  *
     978             :  * This field may be NULL if not required for the pfnProgress being used.
     979             :  */
     980             : 
     981             : /************************************************************************/
     982             : /*                           GDALWarpKernel()                           */
     983             : /************************************************************************/
     984             : 
     985        3848 : GDALWarpKernel::GDALWarpKernel()
     986             :     : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
     987             :       eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
     988             :       dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
     989             :       papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
     990             :       pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
     991             :       papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
     992             :       dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
     993             :       nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
     994             :       nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
     995             :       pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
     996             :       pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
     997             :       padfDstNoDataReal(nullptr), psThreadData(nullptr),
     998        3848 :       eTieStrategy(GWKTS_First)
     999             : {
    1000        3848 : }
    1001             : 
    1002             : /************************************************************************/
    1003             : /*                          ~GDALWarpKernel()                           */
    1004             : /************************************************************************/
    1005             : 
    1006        3848 : GDALWarpKernel::~GDALWarpKernel()
    1007             : {
    1008        3848 : }
    1009             : 
    1010             : /************************************************************************/
    1011             : /*                              getArea()                               */
    1012             : /************************************************************************/
    1013             : 
    1014             : typedef std::pair<double, double> XYPair;
    1015             : 
    1016             : typedef std::vector<XYPair> XYPoly;
    1017             : 
    1018             : // poly may or may not be closed.
    1019      565915 : static double getArea(const XYPoly &poly)
    1020             : {
    1021             :     // CPLAssert(poly.size() >= 2);
    1022      565915 :     const size_t nPointCount = poly.size();
    1023             :     double dfAreaSum =
    1024      565915 :         poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
    1025             : 
    1026     1787320 :     for (size_t i = 1; i < nPointCount - 1; i++)
    1027             :     {
    1028     1221400 :         dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
    1029             :     }
    1030             : 
    1031      565915 :     dfAreaSum += poly[nPointCount - 1].first *
    1032      565915 :                  (poly[0].second - poly[nPointCount - 2].second);
    1033             : 
    1034      565915 :     return 0.5 * std::fabs(dfAreaSum);
    1035             : }
    1036             : 
    1037             : /************************************************************************/
    1038             : /*                       CanUse4SamplesFormula()                        */
    1039             : /************************************************************************/
    1040             : 
    1041        4723 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
    1042             : {
    1043        4723 :     if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
    1044             :     {
    1045             :         // Use 4-sample formula if we are not downsampling by more than a
    1046             :         // factor of 1:2
    1047        2651 :         if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
    1048        2215 :             return true;
    1049         436 :         CPLDebugOnce("WARP",
    1050             :                      "Not using 4-sample bilinear/bicubic formula because "
    1051             :                      "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
    1052             :                      poWK->dfXScale, poWK->dfYScale);
    1053             :     }
    1054        2508 :     return false;
    1055             : }
    1056             : 
    1057             : /************************************************************************/
    1058             : /*                            PerformWarp()                             */
    1059             : /************************************************************************/
    1060             : 
    1061             : /**
    1062             :  * \fn CPLErr GDALWarpKernel::PerformWarp();
    1063             :  *
    1064             :  * This method performs the warp described in the GDALWarpKernel.
    1065             :  *
    1066             :  * @return CE_None on success or CE_Failure if an error occurs.
    1067             :  */
    1068             : 
    1069        3844 : CPLErr GDALWarpKernel::PerformWarp()
    1070             : 
    1071             : {
    1072        3844 :     const CPLErr eErr = Validate();
    1073             : 
    1074        3844 :     if (eErr != CE_None)
    1075           1 :         return eErr;
    1076             : 
    1077             :     // See #2445 and #3079.
    1078        3843 :     if (nSrcXSize <= 0 || nSrcYSize <= 0)
    1079             :     {
    1080         613 :         if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
    1081             :         {
    1082           0 :             CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
    1083           0 :             return CE_Failure;
    1084             :         }
    1085         613 :         return CE_None;
    1086             :     }
    1087             : 
    1088             :     /* -------------------------------------------------------------------- */
    1089             :     /*      Pre-calculate resampling scales and window sizes for filtering. */
    1090             :     /* -------------------------------------------------------------------- */
    1091             : 
    1092        3230 :     dfXScale = 0.0;
    1093        3230 :     dfYScale = 0.0;
    1094             : 
    1095             :     // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
    1096             :     // heterogeneous change in shapes.
    1097             :     // Best would probably be a per-pixel scale computation.
    1098        3230 :     const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
    1099        3230 :     const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
    1100        3230 :     if (!pszXScale || !pszYScale)
    1101             :     {
    1102             :         // Sample points along a grid in the destination space
    1103        3229 :         constexpr int MAX_POINTS_PER_DIM = 10;
    1104        3229 :         const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
    1105        3229 :         const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
    1106        3229 :         constexpr int CORNER_COUNT_PER_SQUARE = 4;
    1107        3229 :         const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
    1108        6458 :         std::vector<double> adfX;
    1109        6458 :         std::vector<double> adfY;
    1110        3229 :         adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1111        3229 :         adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
    1112        6458 :         std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
    1113        6458 :         std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
    1114       31147 :         for (int iY = 0; iY < nPointsY; iY++)
    1115             :         {
    1116       27918 :             const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
    1117       27918 :             const double dfY =
    1118       27918 :                 dfYShift + (nPointsY == 1 ? 0.0
    1119       27701 :                                           : static_cast<double>(iY) *
    1120       27701 :                                                 nDstYSize / (nPointsY - 1));
    1121             : 
    1122      296636 :             for (int iX = 0; iX < nPointsX; iX++)
    1123             :             {
    1124      268718 :                 const double dfXShift =
    1125      268718 :                     (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
    1126             : 
    1127      268718 :                 const double dfX =
    1128      268718 :                     dfXShift + (nPointsX == 1 ? 0.0
    1129      268503 :                                               : static_cast<double>(iX) *
    1130      268503 :                                                     nDstXSize / (nPointsX - 1));
    1131             : 
    1132             :                 // Reproject a unit square at each sample point
    1133      268718 :                 adfX.push_back(dfX);
    1134      268718 :                 adfY.push_back(dfY);
    1135             : 
    1136      268718 :                 adfX.push_back(dfX + 1);
    1137      268718 :                 adfY.push_back(dfY);
    1138             : 
    1139      268718 :                 adfX.push_back(dfX);
    1140      268718 :                 adfY.push_back(dfY + 1);
    1141             : 
    1142      268718 :                 adfX.push_back(dfX + 1);
    1143      268718 :                 adfY.push_back(dfY + 1);
    1144             :             }
    1145             :         }
    1146        3229 :         pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
    1147             :                        adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
    1148             : 
    1149        6458 :         std::vector<XYPair> adfXYScales;
    1150        3229 :         adfXYScales.reserve(nPoints);
    1151      271947 :         for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
    1152             :         {
    1153      536242 :             if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
    1154      267524 :                 abSuccess[i + 3])
    1155             :             {
    1156     2140180 :                 const auto square = [](double x) { return x * x; };
    1157             : 
    1158      267522 :                 const double vx01 = adfX[i + 1] - adfX[i + 0];
    1159      267522 :                 const double vy01 = adfY[i + 1] - adfY[i + 0];
    1160      267522 :                 const double len01_sq = square(vx01) + square(vy01);
    1161             : 
    1162      267522 :                 const double vx23 = adfX[i + 3] - adfX[i + 2];
    1163      267522 :                 const double vy23 = adfY[i + 3] - adfY[i + 2];
    1164      267522 :                 const double len23_sq = square(vx23) + square(vy23);
    1165             : 
    1166      267522 :                 const double vx02 = adfX[i + 2] - adfX[i + 0];
    1167      267522 :                 const double vy02 = adfY[i + 2] - adfY[i + 0];
    1168      267522 :                 const double len02_sq = square(vx02) + square(vy02);
    1169             : 
    1170      267522 :                 const double vx13 = adfX[i + 3] - adfX[i + 1];
    1171      267522 :                 const double vy13 = adfY[i + 3] - adfY[i + 1];
    1172      267522 :                 const double len13_sq = square(vx13) + square(vy13);
    1173             : 
    1174             :                 // ~ 20 degree, heuristic
    1175      267522 :                 constexpr double TAN_MODEST_ANGLE = 0.35;
    1176             : 
    1177             :                 // 10%, heuristic
    1178      267522 :                 constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
    1179             : 
    1180             :                 // Security margin to avoid division by zero (would only
    1181             :                 // happen in case of degenerated coordinate transformation,
    1182             :                 // or insane upsampling)
    1183      267522 :                 constexpr double EPSILON = 1e-10;
    1184             : 
    1185             :                 // Does the transformed square looks like an almost non-rotated
    1186             :                 // quasi-rectangle ?
    1187      267522 :                 if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
    1188      260283 :                     std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
    1189      260256 :                     std::fabs(len01_sq - len23_sq) <
    1190      260256 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
    1191      260143 :                     std::fabs(len02_sq - len13_sq) <
    1192      260143 :                         LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
    1193             :                 {
    1194             :                     // Using a geometric average here of lenAB_sq and lenCD_sq,
    1195             :                     // hence a sqrt(), and as this is still a squared value,
    1196             :                     // we need another sqrt() to get a distance.
    1197             :                     const double dfXLength =
    1198      260128 :                         std::sqrt(std::sqrt(len01_sq * len23_sq));
    1199             :                     const double dfYLength =
    1200      260128 :                         std::sqrt(std::sqrt(len02_sq * len13_sq));
    1201      260128 :                     if (dfXLength > EPSILON && dfYLength > EPSILON)
    1202             :                     {
    1203      260128 :                         const double dfThisXScale = 1.0 / dfXLength;
    1204      260128 :                         const double dfThisYScale = 1.0 / dfYLength;
    1205      260128 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1206      260128 :                     }
    1207             :                 }
    1208             :                 else
    1209             :                 {
    1210             :                     // If not, then consider the area of the transformed unit
    1211             :                     // square to determine the X/Y scales.
    1212        7394 :                     const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
    1213        7394 :                                       {adfX[i + 1], adfY[i + 1]},
    1214        7394 :                                       {adfX[i + 3], adfY[i + 3]},
    1215       29576 :                                       {adfX[i + 2], adfY[i + 2]}};
    1216        7394 :                     const double dfSrcArea = getArea(poly);
    1217        7394 :                     const double dfFactor = std::sqrt(dfSrcArea);
    1218        7394 :                     if (dfFactor > EPSILON)
    1219             :                     {
    1220        7394 :                         const double dfThisXScale = 1.0 / dfFactor;
    1221        7394 :                         const double dfThisYScale = dfThisXScale;
    1222        7394 :                         adfXYScales.push_back({dfThisXScale, dfThisYScale});
    1223             :                     }
    1224             :                 }
    1225             :             }
    1226             :         }
    1227             : 
    1228        3229 :         if (!adfXYScales.empty())
    1229             :         {
    1230             :             // Sort by increasing xscale * yscale
    1231        3229 :             std::sort(adfXYScales.begin(), adfXYScales.end(),
    1232     1456370 :                       [](const XYPair &a, const XYPair &b)
    1233     1456370 :                       { return a.first * a.second < b.first * b.second; });
    1234             : 
    1235             :             // Compute the per-axis maximum of scale
    1236        3229 :             double dfXMax = 0;
    1237        3229 :             double dfYMax = 0;
    1238      270751 :             for (const auto &[dfX, dfY] : adfXYScales)
    1239             :             {
    1240      267522 :                 dfXMax = std::max(dfXMax, dfX);
    1241      267522 :                 dfYMax = std::max(dfYMax, dfY);
    1242             :             }
    1243             : 
    1244             :             // Now eliminate outliers, defined as ones whose value is < 10% of
    1245             :             // the maximum value, typically found at a polar discontinuity, and
    1246             :             // compute the average of non-outlier values.
    1247        3229 :             dfXScale = 0;
    1248        3229 :             dfYScale = 0;
    1249        3229 :             int i = 0;
    1250        3229 :             constexpr double THRESHOLD = 0.1;  // 10%, rather arbitrary
    1251      270751 :             for (const auto &[dfX, dfY] : adfXYScales)
    1252             :             {
    1253      267522 :                 if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
    1254             :                 {
    1255      264634 :                     ++i;
    1256      264634 :                     const double dfXDelta = dfX - dfXScale;
    1257      264634 :                     const double dfYDelta = dfY - dfYScale;
    1258      264634 :                     const double dfInvI = 1.0 / i;
    1259      264634 :                     dfXScale += dfXDelta * dfInvI;
    1260      264634 :                     dfYScale += dfYDelta * dfInvI;
    1261             :                 }
    1262             :             }
    1263             :         }
    1264             :     }
    1265             : 
    1266             :     // Round to closest integer reciprocal scale if we are very close to it
    1267             :     const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
    1268        6460 :         [](double dfScale)
    1269             :     {
    1270        6460 :         if (dfScale < 1.0)
    1271             :         {
    1272        2604 :             double dfReciprocalScale = 1.0 / dfScale;
    1273        2604 :             const int nReciprocalScale =
    1274        2604 :                 static_cast<int>(dfReciprocalScale + 0.5);
    1275        2604 :             if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
    1276        2151 :                 dfScale = 1.0 / nReciprocalScale;
    1277             :         }
    1278        6460 :         return dfScale;
    1279             :     };
    1280             : 
    1281        3230 :     if (dfXScale <= 0)
    1282           1 :         dfXScale = 1.0;
    1283        3230 :     if (dfYScale <= 0)
    1284           1 :         dfYScale = 1.0;
    1285             : 
    1286        3230 :     dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
    1287        3230 :     dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
    1288             : 
    1289        3230 :     if (pszXScale != nullptr)
    1290           1 :         dfXScale = CPLAtof(pszXScale);
    1291        3230 :     if (pszYScale != nullptr)
    1292           1 :         dfYScale = CPLAtof(pszYScale);
    1293             : 
    1294        3230 :     if (!pszXScale || !pszYScale)
    1295        3229 :         CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
    1296             : 
    1297        3230 :     const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
    1298             : 
    1299             :     // Safety check for callers that would use GDALWarpKernel without using
    1300             :     // GDALWarpOperation.
    1301        3167 :     if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
    1302        3102 :          ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
    1303        6460 :           !bUse4SamplesFormula)) &&
    1304         346 :         atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
    1305             :             WARP_EXTRA_ELTS)
    1306             :     {
    1307           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1308             :                  "Source arrays must have WARP_EXTRA_ELTS extra elements at "
    1309             :                  "their end. "
    1310             :                  "See GDALWarpKernel class definition. If this condition is "
    1311             :                  "fulfilled, define a EXTRA_ELTS=%d warp options",
    1312             :                  WARP_EXTRA_ELTS);
    1313           0 :         return CE_Failure;
    1314             :     }
    1315             : 
    1316        3230 :     dfXFilter = anGWKFilterRadius[eResample];
    1317        3230 :     dfYFilter = anGWKFilterRadius[eResample];
    1318             : 
    1319        3230 :     nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
    1320        2636 :                               : static_cast<int>(dfXFilter);
    1321        3230 :     nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
    1322        2658 :                               : static_cast<int>(dfYFilter);
    1323             : 
    1324             :     // Filter window offset depends on the parity of the kernel radius.
    1325        3230 :     nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
    1326        3230 :     nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
    1327             : 
    1328        3230 :     bApplyVerticalShift =
    1329        3230 :         CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
    1330        3230 :     dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
    1331        3230 :         papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
    1332             : 
    1333             :     /* -------------------------------------------------------------------- */
    1334             :     /*      Set up resampling functions.                                    */
    1335             :     /* -------------------------------------------------------------------- */
    1336        3230 :     if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
    1337          12 :         return GWKGeneralCase(this);
    1338             : 
    1339        3218 :     const bool bNoMasksOrDstDensityOnly =
    1340        3208 :         papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
    1341        6426 :         pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
    1342             : 
    1343        3218 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
    1344             :         bNoMasksOrDstDensityOnly)
    1345         954 :         return GWKNearestNoMasksOrDstDensityOnlyByte(this);
    1346             : 
    1347        2264 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
    1348             :         bNoMasksOrDstDensityOnly)
    1349         132 :         return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
    1350             : 
    1351        2132 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
    1352             :         bNoMasksOrDstDensityOnly)
    1353         852 :         return GWKCubicNoMasksOrDstDensityOnlyByte(this);
    1354             : 
    1355        1280 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
    1356             :         bNoMasksOrDstDensityOnly)
    1357          12 :         return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
    1358             : 
    1359        1268 :     if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
    1360         363 :         return GWKNearestByte(this);
    1361             : 
    1362         905 :     if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
    1363         155 :         eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
    1364          14 :         return GWKNearestNoMasksOrDstDensityOnlyShort(this);
    1365             : 
    1366         891 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
    1367             :         bNoMasksOrDstDensityOnly)
    1368           5 :         return GWKCubicNoMasksOrDstDensityOnlyShort(this);
    1369             : 
    1370         886 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
    1371             :         bNoMasksOrDstDensityOnly)
    1372           6 :         return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
    1373             : 
    1374         880 :     if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
    1375             :         bNoMasksOrDstDensityOnly)
    1376           5 :         return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
    1377             : 
    1378         875 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
    1379             :         bNoMasksOrDstDensityOnly)
    1380          14 :         return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
    1381             : 
    1382         861 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
    1383             :         bNoMasksOrDstDensityOnly)
    1384           5 :         return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
    1385             : 
    1386         856 :     if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
    1387             :         bNoMasksOrDstDensityOnly)
    1388           7 :         return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
    1389             : 
    1390         849 :     if (eWorkingDataType == GDT_Int8 && eResample == GRA_NearestNeighbour)
    1391           9 :         return GWKNearestInt8(this);
    1392             : 
    1393         840 :     if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
    1394          40 :         return GWKNearestShort(this);
    1395             : 
    1396         800 :     if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
    1397          10 :         return GWKNearestUnsignedShort(this);
    1398             : 
    1399         790 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
    1400             :         bNoMasksOrDstDensityOnly)
    1401          11 :         return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
    1402             : 
    1403         779 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
    1404          51 :         return GWKNearestFloat(this);
    1405             : 
    1406         728 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
    1407             :         bNoMasksOrDstDensityOnly)
    1408           4 :         return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
    1409             : 
    1410         724 :     if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
    1411             :         bNoMasksOrDstDensityOnly)
    1412           9 :         return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
    1413             : 
    1414             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    1415             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
    1416             :         bNoMasksOrDstDensityOnly)
    1417             :         return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
    1418             : 
    1419             :     if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
    1420             :         bNoMasksOrDstDensityOnly)
    1421             :         return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
    1422             : #endif
    1423             : 
    1424         715 :     if (eResample == GRA_Average)
    1425         160 :         return GWKAverageOrMode(this);
    1426             : 
    1427         555 :     if (eResample == GRA_RMS)
    1428           9 :         return GWKAverageOrMode(this);
    1429             : 
    1430         546 :     if (eResample == GRA_Mode)
    1431          45 :         return GWKAverageOrMode(this);
    1432             : 
    1433         501 :     if (eResample == GRA_Max)
    1434           6 :         return GWKAverageOrMode(this);
    1435             : 
    1436         495 :     if (eResample == GRA_Min)
    1437           5 :         return GWKAverageOrMode(this);
    1438             : 
    1439         490 :     if (eResample == GRA_Med)
    1440           6 :         return GWKAverageOrMode(this);
    1441             : 
    1442         484 :     if (eResample == GRA_Q1)
    1443          10 :         return GWKAverageOrMode(this);
    1444             : 
    1445         474 :     if (eResample == GRA_Q3)
    1446           5 :         return GWKAverageOrMode(this);
    1447             : 
    1448         469 :     if (eResample == GRA_Sum)
    1449          19 :         return GWKSumPreserving(this);
    1450             : 
    1451         450 :     if (!GDALDataTypeIsComplex(eWorkingDataType))
    1452             :     {
    1453         223 :         return GWKRealCase(this);
    1454             :     }
    1455             : 
    1456         227 :     return GWKGeneralCase(this);
    1457             : }
    1458             : 
    1459             : /************************************************************************/
    1460             : /*                              Validate()                              */
    1461             : /************************************************************************/
    1462             : 
    1463             : /**
    1464             :  * \fn CPLErr GDALWarpKernel::Validate()
    1465             :  *
    1466             :  * Check the settings in the GDALWarpKernel, and issue a CPLError()
    1467             :  * (and return CE_Failure) if the configuration is considered to be
    1468             :  * invalid for some reason.
    1469             :  *
    1470             :  * This method will also do some standard defaulting such as setting
    1471             :  * pfnProgress to GDALDummyProgress() if it is NULL.
    1472             :  *
    1473             :  * @return CE_None on success or CE_Failure if an error is detected.
    1474             :  */
    1475             : 
    1476        3844 : CPLErr GDALWarpKernel::Validate()
    1477             : 
    1478             : {
    1479        3844 :     if (static_cast<size_t>(eResample) >=
    1480             :         (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
    1481             :     {
    1482           0 :         CPLError(CE_Failure, CPLE_AppDefined,
    1483             :                  "Unsupported resampling method %d.",
    1484           0 :                  static_cast<int>(eResample));
    1485           0 :         return CE_Failure;
    1486             :     }
    1487             : 
    1488             :     // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
    1489             :     // be ignored as contributing source pixels during resampling. Only taken into account by
    1490             :     // Average currently
    1491             :     const char *pszExcludedValues =
    1492        3844 :         CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
    1493        3844 :     if (pszExcludedValues)
    1494             :     {
    1495             :         const CPLStringList aosTokens(
    1496          18 :             CSLTokenizeString2(pszExcludedValues, "(,)", 0));
    1497          18 :         if ((aosTokens.size() % nBands) != 0)
    1498             :         {
    1499           1 :             CPLError(CE_Failure, CPLE_AppDefined,
    1500             :                      "EXCLUDED_VALUES should contain one or several tuples of "
    1501             :                      "%d values formatted like <R>,<G>,<B> or "
    1502             :                      "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
    1503             :                      "tuples",
    1504             :                      nBands);
    1505           1 :             return CE_Failure;
    1506             :         }
    1507          34 :         std::vector<double> adfTuple;
    1508          68 :         for (int i = 0; i < aosTokens.size(); ++i)
    1509             :         {
    1510          51 :             adfTuple.push_back(CPLAtof(aosTokens[i]));
    1511          51 :             if (((i + 1) % nBands) == 0)
    1512             :             {
    1513          17 :                 m_aadfExcludedValues.push_back(adfTuple);
    1514          17 :                 adfTuple.clear();
    1515             :             }
    1516             :         }
    1517             :     }
    1518             : 
    1519        3843 :     return CE_None;
    1520             : }
    1521             : 
    1522             : /************************************************************************/
    1523             : /*                         GWKOverlayDensity()                          */
    1524             : /*                                                                      */
    1525             : /*      Compute the final density for the destination pixel.  This      */
    1526             : /*      is a function of the overlay density (passed in) and the        */
    1527             : /*      original density.                                               */
    1528             : /************************************************************************/
    1529             : 
    1530    17762100 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
    1531             :                               double dfDensity)
    1532             : {
    1533    17762100 :     if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
    1534    13309900 :         return;
    1535             : 
    1536     4452160 :     poWK->pafDstDensity[iDstOffset] =
    1537     4452160 :         1.0f -
    1538     4452160 :         (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
    1539             : }
    1540             : 
    1541             : /************************************************************************/
    1542             : /*                           GWKRoundValueT()                           */
    1543             : /************************************************************************/
    1544             : 
    1545             : template <class T, class U, bool is_signed> struct sGWKRoundValueT
    1546             : {
    1547             :     static T eval(U);
    1548             : };
    1549             : 
    1550             : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
    1551             : {
    1552      791525 :     static T eval(U value)
    1553             :     {
    1554      791525 :         return static_cast<T>(floor(value + U(0.5)));
    1555             :     }
    1556             : };
    1557             : 
    1558             : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
    1559             : {
    1560   152219851 :     static T eval(U value)
    1561             :     {
    1562   152219851 :         return static_cast<T>(value + U(0.5));
    1563             :     }
    1564             : };
    1565             : 
    1566   153011376 : template <class T, class U> static T GWKRoundValueT(U value)
    1567             : {
    1568   153011376 :     return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
    1569             : }
    1570             : 
    1571      268974 : template <> float GWKRoundValueT<float, double>(double value)
    1572             : {
    1573      268974 :     return static_cast<float>(value);
    1574             : }
    1575             : 
    1576             : #ifdef notused
    1577             : template <> double GWKRoundValueT<double, double>(double value)
    1578             : {
    1579             :     return value;
    1580             : }
    1581             : #endif
    1582             : 
    1583             : /************************************************************************/
    1584             : /*                           GWKClampValueT()                           */
    1585             : /************************************************************************/
    1586             : 
    1587   145841452 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
    1588             : {
    1589   145841452 :     if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
    1590      571307 :         return cpl::NumericLimits<T>::min();
    1591   145270054 :     else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
    1592      776829 :         return cpl::NumericLimits<T>::max();
    1593             :     else
    1594   144493934 :         return GWKRoundValueT<T, U>(value);
    1595             : }
    1596             : 
    1597      718915 : template <> float GWKClampValueT<float, double>(double dfValue)
    1598             : {
    1599      718915 :     return static_cast<float>(dfValue);
    1600             : }
    1601             : 
    1602             : #ifdef notused
    1603             : template <> double GWKClampValueT<double, double>(double dfValue)
    1604             : {
    1605             :     return dfValue;
    1606             : }
    1607             : #endif
    1608             : 
    1609             : /************************************************************************/
    1610             : /*                            AvoidNoData()                             */
    1611             : /************************************************************************/
    1612             : 
    1613        1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
    1614             : {
    1615             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1616             :     {
    1617        1027 :         if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
    1618             :         {
    1619         515 :             pDst[iDstOffset] =
    1620         515 :                 static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
    1621             :         }
    1622             :         else
    1623         512 :             pDst[iDstOffset]--;
    1624             :     }
    1625             :     else
    1626             :     {
    1627         256 :         if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
    1628             :         {
    1629             :             using std::nextafter;
    1630           0 :             pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
    1631             :         }
    1632             :         else
    1633             :         {
    1634             :             using std::nextafter;
    1635         256 :             pDst[iDstOffset] =
    1636         256 :                 nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
    1637             :         }
    1638             :     }
    1639        1283 : }
    1640             : 
    1641             : /************************************************************************/
    1642             : /*                            AvoidNoData()                             */
    1643             : /************************************************************************/
    1644             : 
    1645             : template <class T>
    1646    25539331 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1647             :                         GPtrDiff_t iDstOffset)
    1648             : {
    1649    25539331 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1650    25539331 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1651             : 
    1652    25539331 :     if (poWK->padfDstNoDataReal != nullptr &&
    1653    11380639 :         poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
    1654             :     {
    1655         640 :         AvoidNoData(pDst, iDstOffset);
    1656             : 
    1657         640 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1658             :         {
    1659          40 :             const_cast<GDALWarpKernel *>(poWK)
    1660             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1661          40 :             CPLError(CE_Warning, CPLE_AppDefined,
    1662             :                      "Value %g in the source dataset has been changed to %g "
    1663             :                      "in the destination dataset to avoid being treated as "
    1664             :                      "NoData. To avoid this, select a different NoData value "
    1665             :                      "for the destination dataset.",
    1666          40 :                      poWK->padfDstNoDataReal[iBand],
    1667          40 :                      static_cast<double>(pDst[iDstOffset]));
    1668             :         }
    1669             :     }
    1670    25539331 : }
    1671             : 
    1672             : /************************************************************************/
    1673             : /*                      GWKAvoidNoDataMultiBand()                       */
    1674             : /************************************************************************/
    1675             : 
    1676             : template <class T>
    1677      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1678             :                                     GPtrDiff_t iDstOffset)
    1679             : {
    1680      524573 :     T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
    1681      524573 :     if (poWK->padfDstNoDataReal != nullptr)
    1682             :     {
    1683      208615 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1684             :         {
    1685      208294 :             if (poWK->padfDstNoDataReal[iBand] !=
    1686      208294 :                 static_cast<double>(ppDst[iBand][iDstOffset]))
    1687      205830 :                 return;
    1688             :         }
    1689         964 :         for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1690             :         {
    1691         643 :             AvoidNoData(ppDst[iBand], iDstOffset);
    1692             :         }
    1693             : 
    1694         321 :         if (!poWK->bWarnedAboutDstNoDataReplacement)
    1695             :         {
    1696          21 :             const_cast<GDALWarpKernel *>(poWK)
    1697             :                 ->bWarnedAboutDstNoDataReplacement = true;
    1698          42 :             std::string valueSrc, valueDst;
    1699          64 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    1700             :             {
    1701          43 :                 if (!valueSrc.empty())
    1702             :                 {
    1703          22 :                     valueSrc += ',';
    1704          22 :                     valueDst += ',';
    1705             :                 }
    1706          43 :                 valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
    1707          43 :                 valueDst += CPLSPrintf(
    1708          43 :                     "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
    1709             :             }
    1710          21 :             CPLError(CE_Warning, CPLE_AppDefined,
    1711             :                      "Value %s in the source dataset has been changed to %s "
    1712             :                      "in the destination dataset to avoid being treated as "
    1713             :                      "NoData. To avoid this, select a different NoData value "
    1714             :                      "for the destination dataset.",
    1715             :                      valueSrc.c_str(), valueDst.c_str());
    1716             :         }
    1717             :     }
    1718             : }
    1719             : 
    1720             : /************************************************************************/
    1721             : /*                      GWKAvoidNoDataMultiBand()                       */
    1722             : /************************************************************************/
    1723             : 
    1724      524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
    1725             :                                     GPtrDiff_t iDstOffset)
    1726             : {
    1727      524573 :     switch (poWK->eWorkingDataType)
    1728             :     {
    1729      523997 :         case GDT_UInt8:
    1730      523997 :             GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
    1731      523997 :             break;
    1732             : 
    1733          64 :         case GDT_Int8:
    1734          64 :             GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
    1735          64 :             break;
    1736             : 
    1737          64 :         case GDT_Int16:
    1738          64 :             GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
    1739          64 :             break;
    1740             : 
    1741          64 :         case GDT_UInt16:
    1742          64 :             GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
    1743          64 :             break;
    1744             : 
    1745          64 :         case GDT_Int32:
    1746          64 :             GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
    1747          64 :             break;
    1748             : 
    1749          64 :         case GDT_UInt32:
    1750          64 :             GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
    1751          64 :             break;
    1752             : 
    1753          64 :         case GDT_Int64:
    1754          64 :             GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
    1755          64 :             break;
    1756             : 
    1757          64 :         case GDT_UInt64:
    1758          64 :             GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
    1759          64 :             break;
    1760             : 
    1761           0 :         case GDT_Float16:
    1762           0 :             GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
    1763           0 :             break;
    1764             : 
    1765          64 :         case GDT_Float32:
    1766          64 :             GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
    1767          64 :             break;
    1768             : 
    1769          64 :         case GDT_Float64:
    1770          64 :             GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
    1771          64 :             break;
    1772             : 
    1773           0 :         case GDT_CInt16:
    1774             :         case GDT_CInt32:
    1775             :         case GDT_CFloat16:
    1776             :         case GDT_CFloat32:
    1777             :         case GDT_CFloat64:
    1778             :         case GDT_Unknown:
    1779             :         case GDT_TypeCount:
    1780           0 :             break;
    1781             :     }
    1782      524573 : }
    1783             : 
    1784             : /************************************************************************/
    1785             : /*                       GWKSetPixelValueRealT()                        */
    1786             : /************************************************************************/
    1787             : 
    1788             : template <class T>
    1789    14954278 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
    1790             :                                   GPtrDiff_t iDstOffset, double dfDensity,
    1791             :                                   T value, bool bAvoidNoDataSingleBand)
    1792             : {
    1793    14954278 :     T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    1794             : 
    1795             :     /* -------------------------------------------------------------------- */
    1796             :     /*      If the source density is less than 100% we need to fetch the    */
    1797             :     /*      existing destination value, and mix it with the source to       */
    1798             :     /*      get the new "to apply" value.  Also compute composite           */
    1799             :     /*      density.                                                        */
    1800             :     /*                                                                      */
    1801             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1802             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1803             :     /* -------------------------------------------------------------------- */
    1804    14954278 :     if (dfDensity < 0.9999)
    1805             :     {
    1806      945508 :         if (dfDensity < 0.0001)
    1807           0 :             return true;
    1808             : 
    1809      945508 :         double dfDstDensity = 1.0;
    1810             : 
    1811      945508 :         if (poWK->pafDstDensity != nullptr)
    1812      944036 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1813        1472 :         else if (poWK->panDstValid != nullptr &&
    1814           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1815           0 :             dfDstDensity = 0.0;
    1816             : 
    1817             :         // It seems like we also ought to be testing panDstValid[] here!
    1818             : 
    1819      945508 :         const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
    1820             : 
    1821             :         // The destination density is really only relative to the portion
    1822             :         // not occluded by the overlay.
    1823      945508 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    1824             : 
    1825      945508 :         const double dfReal =
    1826      945508 :             (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
    1827      945508 :             (dfDensity + dfDstInfluence);
    1828             : 
    1829             :         /* --------------------------------------------------------------------
    1830             :          */
    1831             :         /*      Actually apply the destination value. */
    1832             :         /*                                                                      */
    1833             :         /*      Avoid using the destination nodata value for integer datatypes
    1834             :          */
    1835             :         /*      if by chance it is equal to the computed pixel value. */
    1836             :         /* --------------------------------------------------------------------
    1837             :          */
    1838      945508 :         pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
    1839             :     }
    1840             :     else
    1841             :     {
    1842    14008837 :         pDst[iDstOffset] = value;
    1843             :     }
    1844             : 
    1845    14954278 :     if (bAvoidNoDataSingleBand)
    1846    13681622 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1847             : 
    1848    14954278 :     return true;
    1849             : }
    1850             : 
    1851             : /************************************************************************/
    1852             : /*                      ClampRoundAndAvoidNoData()                      */
    1853             : /************************************************************************/
    1854             : 
    1855             : template <class T>
    1856    12158105 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
    1857             :                                      GPtrDiff_t iDstOffset, double dfReal,
    1858             :                                      bool bAvoidNoDataSingleBand)
    1859             : {
    1860    12158105 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1861    12158105 :     T *pDst = reinterpret_cast<T *>(pabyDst);
    1862             : 
    1863             :     if constexpr (cpl::NumericLimits<T>::is_integer)
    1864             :     {
    1865             :         using std::floor;
    1866    11660975 :         if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
    1867        6430 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
    1868    11654575 :         else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    1869       23967 :             pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
    1870             :         else if constexpr (cpl::NumericLimits<T>::is_signed)
    1871       10410 :             pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
    1872             :         else
    1873    11620165 :             pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
    1874             :     }
    1875             :     else
    1876             :     {
    1877      497130 :         pDst[iDstOffset] = static_cast<T>(dfReal);
    1878             :     }
    1879             : 
    1880    12158105 :     if (bAvoidNoDataSingleBand)
    1881    11857709 :         AvoidNoData<T>(poWK, iBand, iDstOffset);
    1882    12158105 : }
    1883             : 
    1884             : /************************************************************************/
    1885             : /*                          GWKSetPixelValue()                          */
    1886             : /************************************************************************/
    1887             : 
    1888    11045400 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
    1889             :                              GPtrDiff_t iDstOffset, double dfDensity,
    1890             :                              double dfReal, double dfImag,
    1891             :                              bool bAvoidNoDataSingleBand)
    1892             : 
    1893             : {
    1894    11045400 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    1895             : 
    1896             :     /* -------------------------------------------------------------------- */
    1897             :     /*      If the source density is less than 100% we need to fetch the    */
    1898             :     /*      existing destination value, and mix it with the source to       */
    1899             :     /*      get the new "to apply" value.  Also compute composite           */
    1900             :     /*      density.                                                        */
    1901             :     /*                                                                      */
    1902             :     /*      We avoid mixing if density is very near one or risk mixing      */
    1903             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    1904             :     /* -------------------------------------------------------------------- */
    1905    11045400 :     if (dfDensity < 0.9999)
    1906             :     {
    1907         800 :         if (dfDensity < 0.0001)
    1908           0 :             return true;
    1909             : 
    1910         800 :         double dfDstDensity = 1.0;
    1911         800 :         if (poWK->pafDstDensity != nullptr)
    1912         800 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    1913           0 :         else if (poWK->panDstValid != nullptr &&
    1914           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    1915           0 :             dfDstDensity = 0.0;
    1916             : 
    1917         800 :         double dfDstReal = 0.0;
    1918         800 :         double dfDstImag = 0.0;
    1919             :         // It seems like we also ought to be testing panDstValid[] here!
    1920             : 
    1921             :         // TODO(schwehr): Factor out this repreated type of set.
    1922         800 :         switch (poWK->eWorkingDataType)
    1923             :         {
    1924           0 :             case GDT_UInt8:
    1925           0 :                 dfDstReal = pabyDst[iDstOffset];
    1926           0 :                 dfDstImag = 0.0;
    1927           0 :                 break;
    1928             : 
    1929           0 :             case GDT_Int8:
    1930           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    1931           0 :                 dfDstImag = 0.0;
    1932           0 :                 break;
    1933             : 
    1934         400 :             case GDT_Int16:
    1935         400 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    1936         400 :                 dfDstImag = 0.0;
    1937         400 :                 break;
    1938             : 
    1939         400 :             case GDT_UInt16:
    1940         400 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    1941         400 :                 dfDstImag = 0.0;
    1942         400 :                 break;
    1943             : 
    1944           0 :             case GDT_Int32:
    1945           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    1946           0 :                 dfDstImag = 0.0;
    1947           0 :                 break;
    1948             : 
    1949           0 :             case GDT_UInt32:
    1950           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    1951           0 :                 dfDstImag = 0.0;
    1952           0 :                 break;
    1953             : 
    1954           0 :             case GDT_Int64:
    1955           0 :                 dfDstReal = static_cast<double>(
    1956           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    1957           0 :                 dfDstImag = 0.0;
    1958           0 :                 break;
    1959             : 
    1960           0 :             case GDT_UInt64:
    1961           0 :                 dfDstReal = static_cast<double>(
    1962           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    1963           0 :                 dfDstImag = 0.0;
    1964           0 :                 break;
    1965             : 
    1966           0 :             case GDT_Float16:
    1967           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    1968           0 :                 dfDstImag = 0.0;
    1969           0 :                 break;
    1970             : 
    1971           0 :             case GDT_Float32:
    1972           0 :                 dfDstReal =
    1973           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    1974           0 :                 dfDstImag = 0.0;
    1975           0 :                 break;
    1976             : 
    1977           0 :             case GDT_Float64:
    1978           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    1979           0 :                 dfDstImag = 0.0;
    1980           0 :                 break;
    1981             : 
    1982           0 :             case GDT_CInt16:
    1983           0 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
    1984           0 :                 dfDstImag =
    1985           0 :                     reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
    1986           0 :                 break;
    1987             : 
    1988           0 :             case GDT_CInt32:
    1989           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
    1990           0 :                 dfDstImag =
    1991           0 :                     reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
    1992           0 :                 break;
    1993             : 
    1994           0 :             case GDT_CFloat16:
    1995             :                 dfDstReal =
    1996           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
    1997             :                 dfDstImag =
    1998           0 :                     reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
    1999           0 :                 break;
    2000             : 
    2001           0 :             case GDT_CFloat32:
    2002           0 :                 dfDstReal =
    2003           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
    2004           0 :                 dfDstImag = double(
    2005           0 :                     reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
    2006           0 :                 break;
    2007             : 
    2008           0 :             case GDT_CFloat64:
    2009           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
    2010           0 :                 dfDstImag =
    2011           0 :                     reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
    2012           0 :                 break;
    2013             : 
    2014           0 :             case GDT_Unknown:
    2015             :             case GDT_TypeCount:
    2016           0 :                 CPLAssert(false);
    2017             :                 return false;
    2018             :         }
    2019             : 
    2020             :         // The destination density is really only relative to the portion
    2021             :         // not occluded by the overlay.
    2022         800 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2023             : 
    2024         800 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2025         800 :                  (dfDensity + dfDstInfluence);
    2026             : 
    2027         800 :         dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
    2028         800 :                  (dfDensity + dfDstInfluence);
    2029             :     }
    2030             : 
    2031             :     /* -------------------------------------------------------------------- */
    2032             :     /*      Actually apply the destination value.                           */
    2033             :     /*                                                                      */
    2034             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2035             :     /*      if by chance it is equal to the computed pixel value.           */
    2036             :     /* -------------------------------------------------------------------- */
    2037             : 
    2038    11045400 :     switch (poWK->eWorkingDataType)
    2039             :     {
    2040    10323000 :         case GDT_UInt8:
    2041    10323000 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2042             :                                             bAvoidNoDataSingleBand);
    2043    10323000 :             break;
    2044             : 
    2045           1 :         case GDT_Int8:
    2046           1 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2047             :                                             bAvoidNoDataSingleBand);
    2048           1 :             break;
    2049             : 
    2050        7471 :         case GDT_Int16:
    2051        7471 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2052             :                                              bAvoidNoDataSingleBand);
    2053        7471 :             break;
    2054             : 
    2055         464 :         case GDT_UInt16:
    2056         464 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2057             :                                               bAvoidNoDataSingleBand);
    2058         464 :             break;
    2059             : 
    2060          63 :         case GDT_UInt32:
    2061          63 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2062             :                                               bAvoidNoDataSingleBand);
    2063          63 :             break;
    2064             : 
    2065          63 :         case GDT_Int32:
    2066          63 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2067             :                                              bAvoidNoDataSingleBand);
    2068          63 :             break;
    2069             : 
    2070           0 :         case GDT_UInt64:
    2071           0 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2072             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2073           0 :             break;
    2074             : 
    2075           0 :         case GDT_Int64:
    2076           0 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2077             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2078           0 :             break;
    2079             : 
    2080           0 :         case GDT_Float16:
    2081           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2082             :                                                bAvoidNoDataSingleBand);
    2083           0 :             break;
    2084             : 
    2085      478957 :         case GDT_Float32:
    2086      478957 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2087             :                                             bAvoidNoDataSingleBand);
    2088      478957 :             break;
    2089             : 
    2090         149 :         case GDT_Float64:
    2091         149 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2092             :                                              bAvoidNoDataSingleBand);
    2093         149 :             break;
    2094             : 
    2095      234079 :         case GDT_CInt16:
    2096             :         {
    2097             :             typedef GInt16 T;
    2098      234079 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2099           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2100           0 :                     cpl::NumericLimits<T>::min();
    2101      234079 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2102           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2103           0 :                     cpl::NumericLimits<T>::max();
    2104             :             else
    2105      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2106      234079 :                     static_cast<T>(floor(dfReal + 0.5));
    2107      234079 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2108           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2109           0 :                     cpl::NumericLimits<T>::min();
    2110      234079 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2111           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2112           0 :                     cpl::NumericLimits<T>::max();
    2113             :             else
    2114      234079 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2115      234079 :                     static_cast<T>(floor(dfImag + 0.5));
    2116      234079 :             break;
    2117             :         }
    2118             : 
    2119         379 :         case GDT_CInt32:
    2120             :         {
    2121             :             typedef GInt32 T;
    2122         379 :             if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
    2123           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2124           0 :                     cpl::NumericLimits<T>::min();
    2125         379 :             else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
    2126           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2127           0 :                     cpl::NumericLimits<T>::max();
    2128             :             else
    2129         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
    2130         379 :                     static_cast<T>(floor(dfReal + 0.5));
    2131         379 :             if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
    2132           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2133           0 :                     cpl::NumericLimits<T>::min();
    2134         379 :             else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
    2135           0 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2136           0 :                     cpl::NumericLimits<T>::max();
    2137             :             else
    2138         379 :                 reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
    2139         379 :                     static_cast<T>(floor(dfImag + 0.5));
    2140         379 :             break;
    2141             :         }
    2142             : 
    2143           0 :         case GDT_CFloat16:
    2144           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
    2145           0 :                 static_cast<GFloat16>(dfReal);
    2146           0 :             reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
    2147           0 :                 static_cast<GFloat16>(dfImag);
    2148           0 :             break;
    2149             : 
    2150         394 :         case GDT_CFloat32:
    2151         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
    2152         394 :                 static_cast<float>(dfReal);
    2153         394 :             reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
    2154         394 :                 static_cast<float>(dfImag);
    2155         394 :             break;
    2156             : 
    2157         380 :         case GDT_CFloat64:
    2158         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
    2159         380 :             reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
    2160         380 :             break;
    2161             : 
    2162           0 :         case GDT_Unknown:
    2163             :         case GDT_TypeCount:
    2164           0 :             return false;
    2165             :     }
    2166             : 
    2167    11045400 :     return true;
    2168             : }
    2169             : 
    2170             : /************************************************************************/
    2171             : /*                        GWKSetPixelValueReal()                        */
    2172             : /************************************************************************/
    2173             : 
    2174     1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2175             :                                  GPtrDiff_t iDstOffset, double dfDensity,
    2176             :                                  double dfReal, bool bAvoidNoDataSingleBand)
    2177             : 
    2178             : {
    2179     1347980 :     GByte *pabyDst = poWK->papabyDstImage[iBand];
    2180             : 
    2181             :     /* -------------------------------------------------------------------- */
    2182             :     /*      If the source density is less than 100% we need to fetch the    */
    2183             :     /*      existing destination value, and mix it with the source to       */
    2184             :     /*      get the new "to apply" value.  Also compute composite           */
    2185             :     /*      density.                                                        */
    2186             :     /*                                                                      */
    2187             :     /*      We avoid mixing if density is very near one or risk mixing      */
    2188             :     /*      in very extreme nodata values and causing odd results (#1610)   */
    2189             :     /* -------------------------------------------------------------------- */
    2190     1347980 :     if (dfDensity < 0.9999)
    2191             :     {
    2192         600 :         if (dfDensity < 0.0001)
    2193           0 :             return true;
    2194             : 
    2195         600 :         double dfDstReal = 0.0;
    2196         600 :         double dfDstDensity = 1.0;
    2197             : 
    2198         600 :         if (poWK->pafDstDensity != nullptr)
    2199         600 :             dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
    2200           0 :         else if (poWK->panDstValid != nullptr &&
    2201           0 :                  !CPLMaskGet(poWK->panDstValid, iDstOffset))
    2202           0 :             dfDstDensity = 0.0;
    2203             : 
    2204             :         // It seems like we also ought to be testing panDstValid[] here!
    2205             : 
    2206         600 :         switch (poWK->eWorkingDataType)
    2207             :         {
    2208           0 :             case GDT_UInt8:
    2209           0 :                 dfDstReal = pabyDst[iDstOffset];
    2210           0 :                 break;
    2211             : 
    2212           0 :             case GDT_Int8:
    2213           0 :                 dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
    2214           0 :                 break;
    2215             : 
    2216         300 :             case GDT_Int16:
    2217         300 :                 dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
    2218         300 :                 break;
    2219             : 
    2220         300 :             case GDT_UInt16:
    2221         300 :                 dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
    2222         300 :                 break;
    2223             : 
    2224           0 :             case GDT_Int32:
    2225           0 :                 dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
    2226           0 :                 break;
    2227             : 
    2228           0 :             case GDT_UInt32:
    2229           0 :                 dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
    2230           0 :                 break;
    2231             : 
    2232           0 :             case GDT_Int64:
    2233           0 :                 dfDstReal = static_cast<double>(
    2234           0 :                     reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
    2235           0 :                 break;
    2236             : 
    2237           0 :             case GDT_UInt64:
    2238           0 :                 dfDstReal = static_cast<double>(
    2239           0 :                     reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
    2240           0 :                 break;
    2241             : 
    2242           0 :             case GDT_Float16:
    2243           0 :                 dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
    2244           0 :                 break;
    2245             : 
    2246           0 :             case GDT_Float32:
    2247           0 :                 dfDstReal =
    2248           0 :                     double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
    2249           0 :                 break;
    2250             : 
    2251           0 :             case GDT_Float64:
    2252           0 :                 dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
    2253           0 :                 break;
    2254             : 
    2255           0 :             case GDT_CInt16:
    2256             :             case GDT_CInt32:
    2257             :             case GDT_CFloat16:
    2258             :             case GDT_CFloat32:
    2259             :             case GDT_CFloat64:
    2260             :             case GDT_Unknown:
    2261             :             case GDT_TypeCount:
    2262           0 :                 CPLAssert(false);
    2263             :                 return false;
    2264             :         }
    2265             : 
    2266             :         // The destination density is really only relative to the portion
    2267             :         // not occluded by the overlay.
    2268         600 :         const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
    2269             : 
    2270         600 :         dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
    2271         600 :                  (dfDensity + dfDstInfluence);
    2272             :     }
    2273             : 
    2274             :     /* -------------------------------------------------------------------- */
    2275             :     /*      Actually apply the destination value.                           */
    2276             :     /*                                                                      */
    2277             :     /*      Avoid using the destination nodata value for integer datatypes  */
    2278             :     /*      if by chance it is equal to the computed pixel value.           */
    2279             :     /* -------------------------------------------------------------------- */
    2280             : 
    2281     1347980 :     switch (poWK->eWorkingDataType)
    2282             :     {
    2283     1325840 :         case GDT_UInt8:
    2284     1325840 :             ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
    2285             :                                             bAvoidNoDataSingleBand);
    2286     1325840 :             break;
    2287             : 
    2288         112 :         case GDT_Int8:
    2289         112 :             ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
    2290             :                                             bAvoidNoDataSingleBand);
    2291         112 :             break;
    2292             : 
    2293        1197 :         case GDT_Int16:
    2294        1197 :             ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
    2295             :                                              bAvoidNoDataSingleBand);
    2296        1197 :             break;
    2297             : 
    2298         475 :         case GDT_UInt16:
    2299         475 :             ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
    2300             :                                               bAvoidNoDataSingleBand);
    2301         475 :             break;
    2302             : 
    2303         539 :         case GDT_UInt32:
    2304         539 :             ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
    2305             :                                               bAvoidNoDataSingleBand);
    2306         539 :             break;
    2307             : 
    2308        1342 :         case GDT_Int32:
    2309        1342 :             ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
    2310             :                                              bAvoidNoDataSingleBand);
    2311        1342 :             break;
    2312             : 
    2313         224 :         case GDT_UInt64:
    2314         224 :             ClampRoundAndAvoidNoData<std::uint64_t>(
    2315             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2316         224 :             break;
    2317             : 
    2318         224 :         case GDT_Int64:
    2319         224 :             ClampRoundAndAvoidNoData<std::int64_t>(
    2320             :                 poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
    2321         224 :             break;
    2322             : 
    2323           0 :         case GDT_Float16:
    2324           0 :             ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
    2325             :                                                bAvoidNoDataSingleBand);
    2326           0 :             break;
    2327             : 
    2328        3538 :         case GDT_Float32:
    2329        3538 :             ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
    2330             :                                             bAvoidNoDataSingleBand);
    2331        3538 :             break;
    2332             : 
    2333       14486 :         case GDT_Float64:
    2334       14486 :             ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
    2335             :                                              bAvoidNoDataSingleBand);
    2336       14486 :             break;
    2337             : 
    2338           0 :         case GDT_CInt16:
    2339             :         case GDT_CInt32:
    2340             :         case GDT_CFloat16:
    2341             :         case GDT_CFloat32:
    2342             :         case GDT_CFloat64:
    2343           0 :             return false;
    2344             : 
    2345           0 :         case GDT_Unknown:
    2346             :         case GDT_TypeCount:
    2347           0 :             CPLAssert(false);
    2348             :             return false;
    2349             :     }
    2350             : 
    2351     1347980 :     return true;
    2352             : }
    2353             : 
    2354             : /************************************************************************/
    2355             : /*                          GWKGetPixelValue()                          */
    2356             : /************************************************************************/
    2357             : 
    2358             : /* It is assumed that panUnifiedSrcValid has been checked before */
    2359             : 
    2360    40173600 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
    2361             :                              GPtrDiff_t iSrcOffset, double *pdfDensity,
    2362             :                              double *pdfReal, double *pdfImag)
    2363             : 
    2364             : {
    2365    40173600 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2366             : 
    2367    80347200 :     if (poWK->papanBandSrcValid != nullptr &&
    2368    40173600 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2369           0 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2370             :     {
    2371           0 :         *pdfDensity = 0.0;
    2372           0 :         return false;
    2373             :     }
    2374             : 
    2375    40173600 :     *pdfReal = 0.0;
    2376    40173600 :     *pdfImag = 0.0;
    2377             : 
    2378             :     // TODO(schwehr): Fix casting.
    2379    40173600 :     switch (poWK->eWorkingDataType)
    2380             :     {
    2381    39096600 :         case GDT_UInt8:
    2382    39096600 :             *pdfReal = pabySrc[iSrcOffset];
    2383    39096600 :             *pdfImag = 0.0;
    2384    39096600 :             break;
    2385             : 
    2386           3 :         case GDT_Int8:
    2387           3 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2388           3 :             *pdfImag = 0.0;
    2389           3 :             break;
    2390             : 
    2391       28229 :         case GDT_Int16:
    2392       28229 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2393       28229 :             *pdfImag = 0.0;
    2394       28229 :             break;
    2395             : 
    2396         166 :         case GDT_UInt16:
    2397         166 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2398         166 :             *pdfImag = 0.0;
    2399         166 :             break;
    2400             : 
    2401          63 :         case GDT_Int32:
    2402          63 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2403          63 :             *pdfImag = 0.0;
    2404          63 :             break;
    2405             : 
    2406          63 :         case GDT_UInt32:
    2407          63 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2408          63 :             *pdfImag = 0.0;
    2409          63 :             break;
    2410             : 
    2411           0 :         case GDT_Int64:
    2412           0 :             *pdfReal = static_cast<double>(
    2413           0 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2414           0 :             *pdfImag = 0.0;
    2415           0 :             break;
    2416             : 
    2417           0 :         case GDT_UInt64:
    2418           0 :             *pdfReal = static_cast<double>(
    2419           0 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2420           0 :             *pdfImag = 0.0;
    2421           0 :             break;
    2422             : 
    2423           0 :         case GDT_Float16:
    2424           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2425           0 :             *pdfImag = 0.0;
    2426           0 :             break;
    2427             : 
    2428     1047220 :         case GDT_Float32:
    2429     1047220 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2430     1047220 :             *pdfImag = 0.0;
    2431     1047220 :             break;
    2432             : 
    2433         587 :         case GDT_Float64:
    2434         587 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2435         587 :             *pdfImag = 0.0;
    2436         587 :             break;
    2437             : 
    2438         133 :         case GDT_CInt16:
    2439         133 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
    2440         133 :             *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2441         133 :             break;
    2442             : 
    2443         133 :         case GDT_CInt32:
    2444         133 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
    2445         133 :             *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
    2446         133 :             break;
    2447             : 
    2448           0 :         case GDT_CFloat16:
    2449           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
    2450           0 :             *pdfImag =
    2451           0 :                 reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
    2452           0 :             break;
    2453             : 
    2454         194 :         case GDT_CFloat32:
    2455         194 :             *pdfReal =
    2456         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
    2457         194 :             *pdfImag =
    2458         194 :                 double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
    2459         194 :             break;
    2460             : 
    2461         138 :         case GDT_CFloat64:
    2462         138 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
    2463         138 :             *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
    2464         138 :             break;
    2465             : 
    2466           0 :         case GDT_Unknown:
    2467             :         case GDT_TypeCount:
    2468           0 :             CPLAssert(false);
    2469             :             *pdfDensity = 0.0;
    2470             :             return false;
    2471             :     }
    2472             : 
    2473    40173600 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2474    12745700 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2475             :     else
    2476    27427800 :         *pdfDensity = 1.0;
    2477             : 
    2478    40173600 :     return *pdfDensity != 0.0;
    2479             : }
    2480             : 
    2481             : /************************************************************************/
    2482             : /*                        GWKGetPixelValueReal()                        */
    2483             : /************************************************************************/
    2484             : 
    2485       15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
    2486             :                                  GPtrDiff_t iSrcOffset, double *pdfDensity,
    2487             :                                  double *pdfReal)
    2488             : 
    2489             : {
    2490       15516 :     GByte *pabySrc = poWK->papabySrcImage[iBand];
    2491             : 
    2492       31034 :     if (poWK->papanBandSrcValid != nullptr &&
    2493       15518 :         poWK->papanBandSrcValid[iBand] != nullptr &&
    2494           2 :         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
    2495             :     {
    2496           0 :         *pdfDensity = 0.0;
    2497           0 :         return false;
    2498             :     }
    2499             : 
    2500       15516 :     switch (poWK->eWorkingDataType)
    2501             :     {
    2502           1 :         case GDT_UInt8:
    2503           1 :             *pdfReal = pabySrc[iSrcOffset];
    2504           1 :             break;
    2505             : 
    2506           0 :         case GDT_Int8:
    2507           0 :             *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
    2508           0 :             break;
    2509             : 
    2510           1 :         case GDT_Int16:
    2511           1 :             *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
    2512           1 :             break;
    2513             : 
    2514           1 :         case GDT_UInt16:
    2515           1 :             *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
    2516           1 :             break;
    2517             : 
    2518         982 :         case GDT_Int32:
    2519         982 :             *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
    2520         982 :             break;
    2521             : 
    2522         179 :         case GDT_UInt32:
    2523         179 :             *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
    2524         179 :             break;
    2525             : 
    2526         112 :         case GDT_Int64:
    2527         112 :             *pdfReal = static_cast<double>(
    2528         112 :                 reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
    2529         112 :             break;
    2530             : 
    2531         112 :         case GDT_UInt64:
    2532         112 :             *pdfReal = static_cast<double>(
    2533         112 :                 reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
    2534         112 :             break;
    2535             : 
    2536           0 :         case GDT_Float16:
    2537           0 :             *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
    2538           0 :             break;
    2539             : 
    2540           2 :         case GDT_Float32:
    2541           2 :             *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
    2542           2 :             break;
    2543             : 
    2544       14126 :         case GDT_Float64:
    2545       14126 :             *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
    2546       14126 :             break;
    2547             : 
    2548           0 :         case GDT_CInt16:
    2549             :         case GDT_CInt32:
    2550             :         case GDT_CFloat16:
    2551             :         case GDT_CFloat32:
    2552             :         case GDT_CFloat64:
    2553             :         case GDT_Unknown:
    2554             :         case GDT_TypeCount:
    2555           0 :             CPLAssert(false);
    2556             :             return false;
    2557             :     }
    2558             : 
    2559       15516 :     if (poWK->pafUnifiedSrcDensity != nullptr)
    2560           0 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2561             :     else
    2562       15516 :         *pdfDensity = 1.0;
    2563             : 
    2564       15516 :     return *pdfDensity != 0.0;
    2565             : }
    2566             : 
    2567             : /************************************************************************/
    2568             : /*                           GWKGetPixelRow()                           */
    2569             : /************************************************************************/
    2570             : 
    2571             : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
    2572             : /* data-types. */
    2573             : 
    2574     2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
    2575             :                            GPtrDiff_t iSrcOffset, int nHalfSrcLen,
    2576             :                            double *padfDensity, double adfReal[],
    2577             :                            double *padfImag)
    2578             : {
    2579             :     // We know that nSrcLen is even, so we can *always* unroll loops 2x.
    2580     2369710 :     const int nSrcLen = nHalfSrcLen * 2;
    2581     2369710 :     bool bHasValid = false;
    2582             : 
    2583     2369710 :     if (padfDensity != nullptr)
    2584             :     {
    2585             :         // Init the density.
    2586     3384030 :         for (int i = 0; i < nSrcLen; i += 2)
    2587             :         {
    2588     2211910 :             padfDensity[i] = 1.0;
    2589     2211910 :             padfDensity[i + 1] = 1.0;
    2590             :         }
    2591             : 
    2592     1172120 :         if (poWK->panUnifiedSrcValid != nullptr)
    2593             :         {
    2594     3281460 :             for (int i = 0; i < nSrcLen; i += 2)
    2595             :             {
    2596     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
    2597     2067740 :                     bHasValid = true;
    2598             :                 else
    2599       74323 :                     padfDensity[i] = 0.0;
    2600             : 
    2601     2142070 :                 if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
    2602     2068400 :                     bHasValid = true;
    2603             :                 else
    2604       73668 :                     padfDensity[i + 1] = 0.0;
    2605             :             }
    2606             : 
    2607             :             // Reset or fail as needed.
    2608     1139400 :             if (bHasValid)
    2609     1116590 :                 bHasValid = false;
    2610             :             else
    2611       22806 :                 return false;
    2612             :         }
    2613             : 
    2614     1149320 :         if (poWK->papanBandSrcValid != nullptr &&
    2615           0 :             poWK->papanBandSrcValid[iBand] != nullptr)
    2616             :         {
    2617           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2618             :             {
    2619           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
    2620           0 :                     bHasValid = true;
    2621             :                 else
    2622           0 :                     padfDensity[i] = 0.0;
    2623             : 
    2624           0 :                 if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
    2625           0 :                                iSrcOffset + i + 1))
    2626           0 :                     bHasValid = true;
    2627             :                 else
    2628           0 :                     padfDensity[i + 1] = 0.0;
    2629             :             }
    2630             : 
    2631             :             // Reset or fail as needed.
    2632           0 :             if (bHasValid)
    2633           0 :                 bHasValid = false;
    2634             :             else
    2635           0 :                 return false;
    2636             :         }
    2637             :     }
    2638             : 
    2639             :     // TODO(schwehr): Fix casting.
    2640             :     // Fetch data.
    2641     2346910 :     switch (poWK->eWorkingDataType)
    2642             :     {
    2643     1136680 :         case GDT_UInt8:
    2644             :         {
    2645     1136680 :             GByte *pSrc =
    2646     1136680 :                 reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
    2647     1136680 :             pSrc += iSrcOffset;
    2648     3281570 :             for (int i = 0; i < nSrcLen; i += 2)
    2649             :             {
    2650     2144890 :                 adfReal[i] = pSrc[i];
    2651     2144890 :                 adfReal[i + 1] = pSrc[i + 1];
    2652             :             }
    2653     1136680 :             break;
    2654             :         }
    2655             : 
    2656         196 :         case GDT_Int8:
    2657             :         {
    2658         196 :             GInt8 *pSrc =
    2659         196 :                 reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
    2660         196 :             pSrc += iSrcOffset;
    2661         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2662             :             {
    2663         196 :                 adfReal[i] = pSrc[i];
    2664         196 :                 adfReal[i + 1] = pSrc[i + 1];
    2665             :             }
    2666         196 :             break;
    2667             :         }
    2668             : 
    2669        5754 :         case GDT_Int16:
    2670             :         {
    2671        5754 :             GInt16 *pSrc =
    2672        5754 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2673        5754 :             pSrc += iSrcOffset;
    2674       21772 :             for (int i = 0; i < nSrcLen; i += 2)
    2675             :             {
    2676       16018 :                 adfReal[i] = pSrc[i];
    2677       16018 :                 adfReal[i + 1] = pSrc[i + 1];
    2678             :             }
    2679        5754 :             break;
    2680             :         }
    2681             : 
    2682        4310 :         case GDT_UInt16:
    2683             :         {
    2684        4310 :             GUInt16 *pSrc =
    2685        4310 :                 reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
    2686        4310 :             pSrc += iSrcOffset;
    2687       18884 :             for (int i = 0; i < nSrcLen; i += 2)
    2688             :             {
    2689       14574 :                 adfReal[i] = pSrc[i];
    2690       14574 :                 adfReal[i + 1] = pSrc[i + 1];
    2691             :             }
    2692        4310 :             break;
    2693             :         }
    2694             : 
    2695         946 :         case GDT_Int32:
    2696             :         {
    2697         946 :             GInt32 *pSrc =
    2698         946 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2699         946 :             pSrc += iSrcOffset;
    2700        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2701             :             {
    2702        1678 :                 adfReal[i] = pSrc[i];
    2703        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2704             :             }
    2705         946 :             break;
    2706             :         }
    2707             : 
    2708         946 :         case GDT_UInt32:
    2709             :         {
    2710         946 :             GUInt32 *pSrc =
    2711         946 :                 reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
    2712         946 :             pSrc += iSrcOffset;
    2713        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2714             :             {
    2715        1678 :                 adfReal[i] = pSrc[i];
    2716        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2717             :             }
    2718         946 :             break;
    2719             :         }
    2720             : 
    2721         196 :         case GDT_Int64:
    2722             :         {
    2723         196 :             auto pSrc =
    2724         196 :                 reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
    2725         196 :             pSrc += iSrcOffset;
    2726         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2727             :             {
    2728         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2729         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2730             :             }
    2731         196 :             break;
    2732             :         }
    2733             : 
    2734         196 :         case GDT_UInt64:
    2735             :         {
    2736         196 :             auto pSrc =
    2737         196 :                 reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
    2738         196 :             pSrc += iSrcOffset;
    2739         392 :             for (int i = 0; i < nSrcLen; i += 2)
    2740             :             {
    2741         196 :                 adfReal[i] = static_cast<double>(pSrc[i]);
    2742         196 :                 adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
    2743             :             }
    2744         196 :             break;
    2745             :         }
    2746             : 
    2747           0 :         case GDT_Float16:
    2748             :         {
    2749           0 :             GFloat16 *pSrc =
    2750           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2751           0 :             pSrc += iSrcOffset;
    2752           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2753             :             {
    2754           0 :                 adfReal[i] = pSrc[i];
    2755           0 :                 adfReal[i + 1] = pSrc[i + 1];
    2756             :             }
    2757           0 :             break;
    2758             :         }
    2759             : 
    2760       25270 :         case GDT_Float32:
    2761             :         {
    2762       25270 :             float *pSrc =
    2763       25270 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2764       25270 :             pSrc += iSrcOffset;
    2765      121739 :             for (int i = 0; i < nSrcLen; i += 2)
    2766             :             {
    2767       96469 :                 adfReal[i] = double(pSrc[i]);
    2768       96469 :                 adfReal[i + 1] = double(pSrc[i + 1]);
    2769             :             }
    2770       25270 :             break;
    2771             :         }
    2772             : 
    2773         946 :         case GDT_Float64:
    2774             :         {
    2775         946 :             double *pSrc =
    2776         946 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2777         946 :             pSrc += iSrcOffset;
    2778        2624 :             for (int i = 0; i < nSrcLen; i += 2)
    2779             :             {
    2780        1678 :                 adfReal[i] = pSrc[i];
    2781        1678 :                 adfReal[i + 1] = pSrc[i + 1];
    2782             :             }
    2783         946 :             break;
    2784             :         }
    2785             : 
    2786     1169220 :         case GDT_CInt16:
    2787             :         {
    2788     1169220 :             GInt16 *pSrc =
    2789     1169220 :                 reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
    2790     1169220 :             pSrc += 2 * iSrcOffset;
    2791     4676020 :             for (int i = 0; i < nSrcLen; i += 2)
    2792             :             {
    2793     3506800 :                 adfReal[i] = pSrc[2 * i];
    2794     3506800 :                 padfImag[i] = pSrc[2 * i + 1];
    2795             : 
    2796     3506800 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2797     3506800 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2798             :             }
    2799     1169220 :             break;
    2800             :         }
    2801             : 
    2802         750 :         case GDT_CInt32:
    2803             :         {
    2804         750 :             GInt32 *pSrc =
    2805         750 :                 reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
    2806         750 :             pSrc += 2 * iSrcOffset;
    2807        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2808             :             {
    2809        1482 :                 adfReal[i] = pSrc[2 * i];
    2810        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2811             : 
    2812        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2813        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2814             :             }
    2815         750 :             break;
    2816             :         }
    2817             : 
    2818           0 :         case GDT_CFloat16:
    2819             :         {
    2820           0 :             GFloat16 *pSrc =
    2821           0 :                 reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
    2822           0 :             pSrc += 2 * iSrcOffset;
    2823           0 :             for (int i = 0; i < nSrcLen; i += 2)
    2824             :             {
    2825           0 :                 adfReal[i] = pSrc[2 * i];
    2826           0 :                 padfImag[i] = pSrc[2 * i + 1];
    2827             : 
    2828           0 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2829           0 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2830             :             }
    2831           0 :             break;
    2832             :         }
    2833             : 
    2834         750 :         case GDT_CFloat32:
    2835             :         {
    2836         750 :             float *pSrc =
    2837         750 :                 reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
    2838         750 :             pSrc += 2 * iSrcOffset;
    2839        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2840             :             {
    2841        1482 :                 adfReal[i] = double(pSrc[2 * i]);
    2842        1482 :                 padfImag[i] = double(pSrc[2 * i + 1]);
    2843             : 
    2844        1482 :                 adfReal[i + 1] = double(pSrc[2 * i + 2]);
    2845        1482 :                 padfImag[i + 1] = double(pSrc[2 * i + 3]);
    2846             :             }
    2847         750 :             break;
    2848             :         }
    2849             : 
    2850         750 :         case GDT_CFloat64:
    2851             :         {
    2852         750 :             double *pSrc =
    2853         750 :                 reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
    2854         750 :             pSrc += 2 * iSrcOffset;
    2855        2232 :             for (int i = 0; i < nSrcLen; i += 2)
    2856             :             {
    2857        1482 :                 adfReal[i] = pSrc[2 * i];
    2858        1482 :                 padfImag[i] = pSrc[2 * i + 1];
    2859             : 
    2860        1482 :                 adfReal[i + 1] = pSrc[2 * i + 2];
    2861        1482 :                 padfImag[i + 1] = pSrc[2 * i + 3];
    2862             :             }
    2863         750 :             break;
    2864             :         }
    2865             : 
    2866           0 :         case GDT_Unknown:
    2867             :         case GDT_TypeCount:
    2868           0 :             CPLAssert(false);
    2869             :             if (padfDensity)
    2870             :                 memset(padfDensity, 0, nSrcLen * sizeof(double));
    2871             :             return false;
    2872             :     }
    2873             : 
    2874     2346910 :     if (padfDensity == nullptr)
    2875     1197590 :         return true;
    2876             : 
    2877     1149320 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2878             :     {
    2879     3256740 :         for (int i = 0; i < nSrcLen; i += 2)
    2880             :         {
    2881             :             // Take into account earlier calcs.
    2882     2127390 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2883             :             {
    2884     2087480 :                 padfDensity[i] = 1.0;
    2885     2087480 :                 bHasValid = true;
    2886             :             }
    2887             : 
    2888     2127390 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2889             :             {
    2890     2088140 :                 padfDensity[i + 1] = 1.0;
    2891     2088140 :                 bHasValid = true;
    2892             :             }
    2893             :         }
    2894             :     }
    2895             :     else
    2896             :     {
    2897       70068 :         for (int i = 0; i < nSrcLen; i += 2)
    2898             :         {
    2899       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2900       50103 :                 padfDensity[i] =
    2901       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
    2902       50103 :             if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2903       49252 :                 bHasValid = true;
    2904             : 
    2905       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2906       50103 :                 padfDensity[i + 1] =
    2907       50103 :                     double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
    2908       50103 :             if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    2909       49170 :                 bHasValid = true;
    2910             :         }
    2911             :     }
    2912             : 
    2913     1149320 :     return bHasValid;
    2914             : }
    2915             : 
    2916             : /************************************************************************/
    2917             : /*                            GWKGetPixelT()                            */
    2918             : /************************************************************************/
    2919             : 
    2920             : template <class T>
    2921    14964660 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
    2922             :                          GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
    2923             : 
    2924             : {
    2925    14964660 :     T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    2926             : 
    2927    33172045 :     if ((poWK->panUnifiedSrcValid != nullptr &&
    2928    29929220 :          !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
    2929    14964660 :         (poWK->papanBandSrcValid != nullptr &&
    2930      589863 :          poWK->papanBandSrcValid[iBand] != nullptr &&
    2931      589863 :          !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
    2932             :     {
    2933           9 :         *pdfDensity = 0.0;
    2934           9 :         return false;
    2935             :     }
    2936             : 
    2937    14964560 :     *pValue = pSrc[iSrcOffset];
    2938             : 
    2939    14964560 :     if (poWK->pafUnifiedSrcDensity == nullptr)
    2940    13842267 :         *pdfDensity = 1.0;
    2941             :     else
    2942     1122362 :         *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    2943             : 
    2944    14964560 :     return *pdfDensity != 0.0;
    2945             : }
    2946             : 
    2947             : /************************************************************************/
    2948             : /*                        GWKBilinearResample()                         */
    2949             : /*     Set of bilinear interpolators                                    */
    2950             : /************************************************************************/
    2951             : 
    2952       77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
    2953             :                                        double dfSrcX, double dfSrcY,
    2954             :                                        double *pdfDensity, double *pdfReal,
    2955             :                                        double *pdfImag)
    2956             : 
    2957             : {
    2958             :     // Save as local variables to avoid following pointers.
    2959       77448 :     const int nSrcXSize = poWK->nSrcXSize;
    2960       77448 :     const int nSrcYSize = poWK->nSrcYSize;
    2961             : 
    2962       77448 :     int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    2963       77448 :     int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    2964       77448 :     double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    2965       77448 :     double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    2966       77448 :     bool bShifted = false;
    2967             : 
    2968       77448 :     if (iSrcX == -1)
    2969             :     {
    2970        1534 :         iSrcX = 0;
    2971        1534 :         dfRatioX = 1;
    2972             :     }
    2973       77448 :     if (iSrcY == -1)
    2974             :     {
    2975        7734 :         iSrcY = 0;
    2976        7734 :         dfRatioY = 1;
    2977             :     }
    2978       77448 :     GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    2979             : 
    2980             :     // Shift so we don't overrun the array.
    2981       77448 :     if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
    2982       77330 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
    2983       77330 :             iSrcOffset + nSrcXSize + 1)
    2984             :     {
    2985         230 :         bShifted = true;
    2986         230 :         --iSrcOffset;
    2987             :     }
    2988             : 
    2989       77448 :     double adfDensity[2] = {0.0, 0.0};
    2990       77448 :     double adfReal[2] = {0.0, 0.0};
    2991       77448 :     double adfImag[2] = {0.0, 0.0};
    2992       77448 :     double dfAccumulatorReal = 0.0;
    2993       77448 :     double dfAccumulatorImag = 0.0;
    2994       77448 :     double dfAccumulatorDensity = 0.0;
    2995       77448 :     double dfAccumulatorDivisor = 0.0;
    2996             : 
    2997       77448 :     const GPtrDiff_t nSrcPixels =
    2998       77448 :         static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
    2999             :     // Get pixel row.
    3000       77448 :     if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
    3001      154896 :         iSrcOffset < nSrcPixels &&
    3002       77448 :         GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
    3003             :                        adfImag))
    3004             :     {
    3005       71504 :         double dfMult1 = dfRatioX * dfRatioY;
    3006       71504 :         double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
    3007             : 
    3008             :         // Shifting corrected.
    3009       71504 :         if (bShifted)
    3010             :         {
    3011         230 :             adfReal[0] = adfReal[1];
    3012         230 :             adfImag[0] = adfImag[1];
    3013         230 :             adfDensity[0] = adfDensity[1];
    3014             :         }
    3015             : 
    3016             :         // Upper Left Pixel.
    3017       71504 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3018       71504 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3019             :         {
    3020       66050 :             dfAccumulatorDivisor += dfMult1;
    3021             : 
    3022       66050 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3023       66050 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3024       66050 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3025             :         }
    3026             : 
    3027             :         // Upper Right Pixel.
    3028       71504 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3029       70609 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3030             :         {
    3031       65335 :             dfAccumulatorDivisor += dfMult2;
    3032             : 
    3033       65335 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3034       65335 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3035       65335 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3036             :         }
    3037             :     }
    3038             : 
    3039             :     // Get pixel row.
    3040       77448 :     if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
    3041      228032 :         iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
    3042       73136 :         GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
    3043             :                        adfReal, adfImag))
    3044             :     {
    3045       67577 :         double dfMult1 = dfRatioX * (1.0 - dfRatioY);
    3046       67577 :         double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3047             : 
    3048             :         // Shifting corrected
    3049       67577 :         if (bShifted)
    3050             :         {
    3051         112 :             adfReal[0] = adfReal[1];
    3052         112 :             adfImag[0] = adfImag[1];
    3053         112 :             adfDensity[0] = adfDensity[1];
    3054             :         }
    3055             : 
    3056             :         // Lower Left Pixel
    3057       67577 :         if (iSrcX >= 0 && iSrcX < nSrcXSize &&
    3058       67577 :             adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3059             :         {
    3060       62298 :             dfAccumulatorDivisor += dfMult1;
    3061             : 
    3062       62298 :             dfAccumulatorReal += adfReal[0] * dfMult1;
    3063       62298 :             dfAccumulatorImag += adfImag[0] * dfMult1;
    3064       62298 :             dfAccumulatorDensity += adfDensity[0] * dfMult1;
    3065             :         }
    3066             : 
    3067             :         // Lower Right Pixel.
    3068       67577 :         if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
    3069       66800 :             adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
    3070             :         {
    3071       61823 :             dfAccumulatorDivisor += dfMult2;
    3072             : 
    3073       61823 :             dfAccumulatorReal += adfReal[1] * dfMult2;
    3074       61823 :             dfAccumulatorImag += adfImag[1] * dfMult2;
    3075       61823 :             dfAccumulatorDensity += adfDensity[1] * dfMult2;
    3076             :         }
    3077             :     }
    3078             : 
    3079             :     /* -------------------------------------------------------------------- */
    3080             :     /*      Return result.                                                  */
    3081             :     /* -------------------------------------------------------------------- */
    3082       77448 :     if (dfAccumulatorDivisor == 1.0)
    3083             :     {
    3084       45929 :         *pdfReal = dfAccumulatorReal;
    3085       45929 :         *pdfImag = dfAccumulatorImag;
    3086       45929 :         *pdfDensity = dfAccumulatorDensity;
    3087       45929 :         return false;
    3088             :     }
    3089       31519 :     else if (dfAccumulatorDivisor < 0.00001)
    3090             :     {
    3091           0 :         *pdfReal = 0.0;
    3092           0 :         *pdfImag = 0.0;
    3093           0 :         *pdfDensity = 0.0;
    3094           0 :         return false;
    3095             :     }
    3096             :     else
    3097             :     {
    3098       31519 :         *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
    3099       31519 :         *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
    3100       31519 :         *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
    3101       31519 :         return true;
    3102             :     }
    3103             : }
    3104             : 
    3105             : template <class T>
    3106     8786376 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3107             :                                                int iBand, double dfSrcX,
    3108             :                                                double dfSrcY, T *pValue)
    3109             : 
    3110             : {
    3111             : 
    3112     8786376 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    3113     8786376 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    3114     8786376 :     GPtrDiff_t iSrcOffset =
    3115     8786376 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3116     8786376 :     const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
    3117     8786376 :     const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
    3118             : 
    3119     8786376 :     const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    3120             : 
    3121     8786376 :     if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3122     6227729 :         iSrcY + 1 < poWK->nSrcYSize)
    3123             :     {
    3124     6036147 :         const double dfAccumulator =
    3125     6036147 :             (double(pSrc[iSrcOffset]) * dfRatioX +
    3126     6036147 :              double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
    3127             :                 dfRatioY +
    3128     6036147 :             (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
    3129     6036147 :              double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
    3130     6036147 :                  (1.0 - dfRatioX)) *
    3131     6036147 :                 (1.0 - dfRatioY);
    3132             : 
    3133     6036147 :         *pValue = GWKRoundValueT<T>(dfAccumulator);
    3134             : 
    3135     6036147 :         return true;
    3136             :     }
    3137             : 
    3138     2750229 :     double dfAccumulatorDivisor = 0.0;
    3139     2750229 :     double dfAccumulator = 0.0;
    3140             : 
    3141             :     // Upper Left Pixel.
    3142     2750229 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
    3143      565015 :         iSrcY < poWK->nSrcYSize)
    3144             :     {
    3145      565015 :         const double dfMult = dfRatioX * dfRatioY;
    3146             : 
    3147      565015 :         dfAccumulatorDivisor += dfMult;
    3148             : 
    3149      565015 :         dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
    3150             :     }
    3151             : 
    3152             :     // Upper Right Pixel.
    3153     2750229 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
    3154     2065499 :         iSrcY < poWK->nSrcYSize)
    3155             :     {
    3156     2065499 :         const double dfMult = (1.0 - dfRatioX) * dfRatioY;
    3157             : 
    3158     2065499 :         dfAccumulatorDivisor += dfMult;
    3159             : 
    3160     2065499 :         dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
    3161             :     }
    3162             : 
    3163             :     // Lower Right Pixel.
    3164     2750229 :     if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3165     2316563 :         iSrcY + 1 < poWK->nSrcYSize)
    3166             :     {
    3167     2064786 :         const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
    3168             : 
    3169     2064786 :         dfAccumulatorDivisor += dfMult;
    3170             : 
    3171     2064786 :         dfAccumulator +=
    3172     2064786 :             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
    3173             :     }
    3174             : 
    3175             :     // Lower Left Pixel.
    3176     2750229 :     if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
    3177      815797 :         iSrcY + 1 < poWK->nSrcYSize)
    3178             :     {
    3179      564024 :         const double dfMult = dfRatioX * (1.0 - dfRatioY);
    3180             : 
    3181      564024 :         dfAccumulatorDivisor += dfMult;
    3182             : 
    3183      564024 :         dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
    3184             :     }
    3185             : 
    3186             :     /* -------------------------------------------------------------------- */
    3187             :     /*      Return result.                                                  */
    3188             :     /* -------------------------------------------------------------------- */
    3189     2750229 :     double dfValue = 0.0;
    3190             : 
    3191     2750229 :     if (dfAccumulatorDivisor < 0.00001)
    3192             :     {
    3193           0 :         *pValue = 0;
    3194           0 :         return false;
    3195             :     }
    3196     2750229 :     else if (dfAccumulatorDivisor == 1.0)
    3197             :     {
    3198       22176 :         dfValue = dfAccumulator;
    3199             :     }
    3200             :     else
    3201             :     {
    3202     2728047 :         dfValue = dfAccumulator / dfAccumulatorDivisor;
    3203             :     }
    3204             : 
    3205     2750229 :     *pValue = GWKRoundValueT<T>(dfValue);
    3206             : 
    3207     2750229 :     return true;
    3208             : }
    3209             : 
    3210             : /************************************************************************/
    3211             : /*                        GWKCubicResample()                            */
    3212             : /*     Set of bicubic interpolators using cubic convolution.            */
    3213             : /************************************************************************/
    3214             : 
    3215             : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
    3216             : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
    3217             : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
    3218             : 
    3219             : template <typename T>
    3220     1810810 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
    3221             :                                  T f1, T f2, T f3)
    3222             : {
    3223     1810810 :     return (f1 + T(0.5) * (distance1 * (f2 - f0) +
    3224     1810810 :                            distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
    3225     1810810 :                            distance3 * (3 * (f1 - f2) + f3 - f0)));
    3226             : }
    3227             : 
    3228             : /************************************************************************/
    3229             : /*                       GWKCubicComputeWeights()                       */
    3230             : /************************************************************************/
    3231             : 
    3232             : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
    3233             : 
    3234             : template <typename T>
    3235    98041250 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
    3236             : {
    3237    98041250 :     const T halfX = T(0.5) * x;
    3238    98041250 :     const T threeX = T(3.0) * x;
    3239    98041250 :     const T halfX2 = halfX * x;
    3240             : 
    3241    98041250 :     coeffs[0] = halfX * (-1 + x * (2 - x));
    3242    98041250 :     coeffs[1] = 1 + halfX2 * (-5 + threeX);
    3243    98041250 :     coeffs[2] = halfX * (1 + x * (4 - threeX));
    3244    98041250 :     coeffs[3] = halfX2 * (-1 + x);
    3245    98041250 : }
    3246             : 
    3247    14682906 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
    3248             : {
    3249    14682906 :     return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
    3250    14682906 :            v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
    3251             : }
    3252             : 
    3253             : #if 0
    3254             : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
    3255             : // instead of 17.
    3256             : // TODO(schwehr): Use an inline function.
    3257             : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
    3258             :     {                                                                          \
    3259             :         const double dfX = dfX_;                                               \
    3260             :         dfHalfX = 0.5 * dfX;                                                   \
    3261             :         const double dfThreeX = 3.0 * dfX;                                     \
    3262             :         const double dfXMinus1 = dfX - 1;                                      \
    3263             :                                                                                \
    3264             :         adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
    3265             :         adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
    3266             :         /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
    3267             :         adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
    3268             :         /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
    3269             :         adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
    3270             :     }
    3271             : 
    3272             : // TODO(schwehr): Use an inline function.
    3273             : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
    3274             :     ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
    3275             :                            (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
    3276             : #endif
    3277             : 
    3278      302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
    3279             :                                     double dfSrcX, double dfSrcY,
    3280             :                                     double *pdfDensity, double *pdfReal,
    3281             :                                     double *pdfImag)
    3282             : 
    3283             : {
    3284      302045 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3285      302045 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3286      302045 :     GPtrDiff_t iSrcOffset =
    3287      302045 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3288      302045 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3289      302045 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3290      302045 :     double adfDensity[4] = {};
    3291      302045 :     double adfReal[4] = {};
    3292      302045 :     double adfImag[4] = {};
    3293             : 
    3294             :     // Get the bilinear interpolation at the image borders.
    3295      302045 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3296      286140 :         iSrcY + 2 >= poWK->nSrcYSize)
    3297       24670 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3298       24670 :                                           pdfDensity, pdfReal, pdfImag);
    3299             : 
    3300      277375 :     double adfValueDens[4] = {};
    3301      277375 :     double adfValueReal[4] = {};
    3302      277375 :     double adfValueImag[4] = {};
    3303             : 
    3304      277375 :     double adfCoeffsX[4] = {};
    3305      277375 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3306             : 
    3307     1240570 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3308             :     {
    3309     1009640 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3310      998035 :                             2, adfDensity, adfReal, adfImag) ||
    3311      998035 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3312      980395 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3313     2979770 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3314      972094 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3315             :         {
    3316       46449 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3317       46449 :                                               pdfDensity, pdfReal, pdfImag);
    3318             :         }
    3319             : 
    3320      963196 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3321      963196 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3322      963196 :         adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
    3323             :     }
    3324             : 
    3325             :     /* -------------------------------------------------------------------- */
    3326             :     /*      For now, if we have any pixels missing in the kernel area,      */
    3327             :     /*      we fallback on using bilinear interpolation.  Ideally we        */
    3328             :     /*      should do "weight adjustment" of our results similarly to       */
    3329             :     /*      what is done for the cubic spline and lanc. interpolators.      */
    3330             :     /* -------------------------------------------------------------------- */
    3331             : 
    3332      230926 :     double adfCoeffsY[4] = {};
    3333      230926 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3334             : 
    3335      230926 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3336      230926 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3337      230926 :     *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
    3338             : 
    3339      230926 :     return true;
    3340             : }
    3341             : 
    3342             : #ifdef USE_SSE2
    3343             : 
    3344             : /************************************************************************/
    3345             : /*                           XMMLoad4Values()                           */
    3346             : /*                                                                      */
    3347             : /*  Load 4 packed byte or uint16, cast them to float and put them in a  */
    3348             : /*  m128 register.                                                      */
    3349             : /************************************************************************/
    3350             : 
    3351   568577000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
    3352             : {
    3353             :     unsigned int i;
    3354   568577000 :     memcpy(&i, ptr, 4);
    3355  1137150000 :     __m128i xmm_i = _mm_cvtsi32_si128(i);
    3356             :     // Zero extend 4 packed unsigned 8-bit integers in a to packed
    3357             :     // 32-bit integers.
    3358             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3359             :     xmm_i = _mm_cvtepu8_epi32(xmm_i);
    3360             : #else
    3361  1137150000 :     xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
    3362  1137150000 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3363             : #endif
    3364  1137150000 :     return _mm_cvtepi32_ps(xmm_i);
    3365             : }
    3366             : 
    3367     1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
    3368             : {
    3369             :     GUInt64 i;
    3370     1108340 :     memcpy(&i, ptr, 8);
    3371     2216690 :     __m128i xmm_i = _mm_cvtsi64_si128(i);
    3372             :     // Zero extend 4 packed unsigned 16-bit integers in a to packed
    3373             :     // 32-bit integers.
    3374             : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
    3375             :     xmm_i = _mm_cvtepu16_epi32(xmm_i);
    3376             : #else
    3377     2216690 :     xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
    3378             : #endif
    3379     2216690 :     return _mm_cvtepi32_ps(xmm_i);
    3380             : }
    3381             : 
    3382             : /************************************************************************/
    3383             : /*                           XMMHorizontalAdd()                         */
    3384             : /*                                                                      */
    3385             : /*  Return the sum of the 4 floating points of the register.            */
    3386             : /************************************************************************/
    3387             : 
    3388             : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
    3389             : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3390             : {
    3391             :     __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
    3392             :     __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
    3393             :     shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
    3394             :     sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
    3395             :     return _mm_cvtss_f32(sums);
    3396             : }
    3397             : #else
    3398   142421000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
    3399             : {
    3400   142421000 :     __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
    3401   142421000 :     __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
    3402   142421000 :     shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
    3403   142421000 :     sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
    3404   142421000 :     return _mm_cvtss_f32(sums);
    3405             : }
    3406             : #endif
    3407             : 
    3408             : #endif  // define USE_SSE2
    3409             : 
    3410             : /************************************************************************/
    3411             : /*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
    3412             : /************************************************************************/
    3413             : 
    3414             : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
    3415             : // because there are a few assumptions above those types.
    3416             : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
    3417             : // perf benefit.
    3418             : 
    3419             : template <class T>
    3420      389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
    3421             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3422             :     double *pdfDensity, double *pdfReal)
    3423             : {
    3424      389755 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3425      389755 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3426      389755 :     const GPtrDiff_t iSrcOffset =
    3427      389755 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3428             : 
    3429             :     // Get the bilinear interpolation at the image borders.
    3430      389755 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3431      387271 :         iSrcY + 2 >= poWK->nSrcYSize)
    3432             :     {
    3433        2484 :         double adfImagIgnored[4] = {};
    3434        2484 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3435        2484 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3436             :     }
    3437             : 
    3438             : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3439             :     const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
    3440             :     const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
    3441             : 
    3442             :     // TODO(schwehr): Explain the magic numbers.
    3443             :     float afTemp[4 + 4 + 4 + 1];
    3444             :     float *pafAligned =
    3445             :         reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
    3446             :     float *pafCoeffs = pafAligned;
    3447             :     float *pafDensity = pafAligned + 4;
    3448             :     float *pafValue = pafAligned + 8;
    3449             : 
    3450             :     const float fHalfDeltaX = 0.5f * fDeltaX;
    3451             :     const float fThreeDeltaX = 3.0f * fDeltaX;
    3452             :     const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
    3453             : 
    3454             :     pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
    3455             :     pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
    3456             :     pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
    3457             :     pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
    3458             :     __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
    3459             :     const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
    3460             : 
    3461             :     __m128 xmmMaskLowDensity = _mm_setzero_ps();
    3462             :     for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
    3463             :          i++, iOffset += poWK->nSrcXSize)
    3464             :     {
    3465             :         const __m128 xmmDensity =
    3466             :             _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
    3467             :         xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
    3468             :                                       _mm_cmplt_ps(xmmDensity, xmmThreshold));
    3469             :         pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3470             : 
    3471             :         const __m128 xmmValues =
    3472             :             XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
    3473             :         pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
    3474             :     }
    3475             :     if (_mm_movemask_ps(xmmMaskLowDensity))
    3476             :     {
    3477             :         double adfImagIgnored[4] = {};
    3478             :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3479             :                                           pdfDensity, pdfReal, adfImagIgnored);
    3480             :     }
    3481             : 
    3482             :     const float fHalfDeltaY = 0.5f * fDeltaY;
    3483             :     const float fThreeDeltaY = 3.0f * fDeltaY;
    3484             :     const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
    3485             : 
    3486             :     pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
    3487             :     pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
    3488             :     pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
    3489             :     pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
    3490             : 
    3491             :     xmmCoeffs = _mm_load_ps(pafCoeffs);
    3492             : 
    3493             :     const __m128 xmmDensity = _mm_load_ps(pafDensity);
    3494             :     const __m128 xmmValue = _mm_load_ps(pafValue);
    3495             :     *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
    3496             :     *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
    3497             : 
    3498             :     // We did all above computations on float32 whereas the general case is
    3499             :     // float64. Not sure if one is fundamentally more correct than the other
    3500             :     // one, but we want our optimization to give the same result as the
    3501             :     // general case as much as possible, so if the resulting value is
    3502             :     // close to some_int_value + 0.5, redo the computation with the general
    3503             :     // case.
    3504             :     // Note: If other types than Byte or UInt16, will need changes.
    3505             :     if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
    3506             :         return true;
    3507             : 
    3508             : #endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
    3509             : 
    3510      387271 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3511      387271 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3512             : 
    3513      387271 :     double adfValueDens[4] = {};
    3514      387271 :     double adfValueReal[4] = {};
    3515             : 
    3516      387271 :     double adfCoeffsX[4] = {};
    3517      387271 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3518             : 
    3519      387271 :     double adfCoeffsY[4] = {};
    3520      387271 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3521             : 
    3522     1930200 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3523             :     {
    3524     1544480 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3525             : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
    3526     1544480 :         if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
    3527     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3528     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 1] <
    3529     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3530     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 2] <
    3531     1542940 :                 SRC_DENSITY_THRESHOLD_FLOAT ||
    3532     1542940 :             poWK->pafUnifiedSrcDensity[iOffset + 3] <
    3533             :                 SRC_DENSITY_THRESHOLD_FLOAT)
    3534             :         {
    3535        1551 :             double adfImagIgnored[4] = {};
    3536        1551 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3537             :                                               pdfDensity, pdfReal,
    3538        1551 :                                               adfImagIgnored);
    3539             :         }
    3540             : #endif
    3541             : 
    3542     3085860 :         adfValueDens[i + 1] =
    3543     1542930 :             CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
    3544             : 
    3545     1542930 :         adfValueReal[i + 1] = CONVOL4(
    3546             :             adfCoeffsX,
    3547     1542930 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3548             :     }
    3549             : 
    3550      385720 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3551      385720 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3552             : 
    3553      385720 :     return true;
    3554             : }
    3555             : 
    3556             : /************************************************************************/
    3557             : /*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
    3558             : /*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
    3559             : /************************************************************************/
    3560             : 
    3561           0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
    3562             :     const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
    3563             :     double *pdfDensity, double *pdfReal)
    3564             : 
    3565             : {
    3566           0 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3567           0 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3568           0 :     const GPtrDiff_t iSrcOffset =
    3569           0 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3570           0 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3571           0 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3572             : 
    3573             :     // Get the bilinear interpolation at the image borders.
    3574           0 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3575           0 :         iSrcY + 2 >= poWK->nSrcYSize)
    3576             :     {
    3577           0 :         double adfImagIgnored[4] = {};
    3578           0 :         return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3579           0 :                                           pdfDensity, pdfReal, adfImagIgnored);
    3580             :     }
    3581             : 
    3582           0 :     double adfCoeffsX[4] = {};
    3583           0 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
    3584             : 
    3585           0 :     double adfCoeffsY[4] = {};
    3586           0 :     GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
    3587             : 
    3588           0 :     double adfValueDens[4] = {};
    3589           0 :     double adfValueReal[4] = {};
    3590           0 :     double adfDensity[4] = {};
    3591           0 :     double adfReal[4] = {};
    3592           0 :     double adfImagIgnored[4] = {};
    3593             : 
    3594           0 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3595             :     {
    3596           0 :         if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
    3597           0 :                             2, adfDensity, adfReal, adfImagIgnored) ||
    3598           0 :             adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3599           0 :             adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3600           0 :             adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
    3601           0 :             adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
    3602             :         {
    3603           0 :             return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
    3604             :                                               pdfDensity, pdfReal,
    3605           0 :                                               adfImagIgnored);
    3606             :         }
    3607             : 
    3608           0 :         adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
    3609           0 :         adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
    3610             :     }
    3611             : 
    3612           0 :     *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
    3613           0 :     *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
    3614             : 
    3615           0 :     return true;
    3616             : }
    3617             : 
    3618             : template <class T>
    3619     2301250 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
    3620             :                                             int iBand, double dfSrcX,
    3621             :                                             double dfSrcY, T *pValue)
    3622             : 
    3623             : {
    3624     2301250 :     const int iSrcX = static_cast<int>(dfSrcX - 0.5);
    3625     2301250 :     const int iSrcY = static_cast<int>(dfSrcY - 0.5);
    3626     2301250 :     const GPtrDiff_t iSrcOffset =
    3627     2301250 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    3628     2301250 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    3629     2301250 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    3630     2301250 :     const double dfDeltaY2 = dfDeltaY * dfDeltaY;
    3631     2301250 :     const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
    3632             : 
    3633             :     // Get the bilinear interpolation at the image borders.
    3634     2301250 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    3635     1883214 :         iSrcY + 2 >= poWK->nSrcYSize)
    3636      490439 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    3637      490439 :                                                   pValue);
    3638             : 
    3639     1810811 :     double adfCoeffs[4] = {};
    3640     1810811 :     GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
    3641             : 
    3642     1810811 :     double adfValue[4] = {};
    3643             : 
    3644     9054050 :     for (GPtrDiff_t i = -1; i < 3; i++)
    3645             :     {
    3646     7243246 :         const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
    3647             : 
    3648     7243246 :         adfValue[i + 1] = CONVOL4(
    3649             :             adfCoeffs,
    3650     7243246 :             reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
    3651             :     }
    3652             : 
    3653             :     const double dfValue =
    3654     1810811 :         CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
    3655             :                          adfValue[1], adfValue[2], adfValue[3]);
    3656             : 
    3657     1810811 :     *pValue = GWKClampValueT<T>(dfValue);
    3658             : 
    3659     1810811 :     return true;
    3660             : }
    3661             : 
    3662             : /************************************************************************/
    3663             : /*                           GWKLanczosSinc()                           */
    3664             : /************************************************************************/
    3665             : 
    3666             : /*
    3667             :  * Lanczos windowed sinc interpolation kernel with radius r.
    3668             :  *        /
    3669             :  *        | sinc(x) * sinc(x/r), if |x| < r
    3670             :  * L(x) = | 1, if x = 0                     ,
    3671             :  *        | 0, otherwise
    3672             :  *        \
    3673             :  *
    3674             :  * where sinc(x) = sin(PI * x) / (PI * x).
    3675             :  */
    3676             : 
    3677        1704 : static double GWKLanczosSinc(double dfX)
    3678             : {
    3679        1704 :     if (dfX == 0.0)
    3680           0 :         return 1.0;
    3681             : 
    3682        1704 :     const double dfPIX = M_PI * dfX;
    3683        1704 :     const double dfPIXoverR = dfPIX / 3;
    3684        1704 :     const double dfPIX2overR = dfPIX * dfPIXoverR;
    3685             :     // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3686             :     // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3687        1704 :     const double dfSinPIXoverR = sin(dfPIXoverR);
    3688        1704 :     const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3689        1704 :     const double dfSinPIXMulSinPIXoverR =
    3690        1704 :         (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3691        1704 :     return dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3692             : }
    3693             : 
    3694      106844 : static double GWKLanczosSinc4Values(double *padfValues)
    3695             : {
    3696      534220 :     for (int i = 0; i < 4; i++)
    3697             :     {
    3698      427376 :         if (padfValues[i] == 0.0)
    3699             :         {
    3700           0 :             padfValues[i] = 1.0;
    3701             :         }
    3702             :         else
    3703             :         {
    3704      427376 :             const double dfPIX = M_PI * padfValues[i];
    3705      427376 :             const double dfPIXoverR = dfPIX / 3;
    3706      427376 :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    3707             :             // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
    3708             :             // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
    3709      427376 :             const double dfSinPIXoverR = sin(dfPIXoverR);
    3710      427376 :             const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
    3711      427376 :             const double dfSinPIXMulSinPIXoverR =
    3712      427376 :                 (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
    3713      427376 :             padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
    3714             :         }
    3715             :     }
    3716      106844 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3717             : }
    3718             : 
    3719             : /************************************************************************/
    3720             : /*                            GWKBilinear()                             */
    3721             : /************************************************************************/
    3722             : 
    3723     6349880 : static double GWKBilinear(double dfX)
    3724             : {
    3725     6349880 :     double dfAbsX = fabs(dfX);
    3726     6349880 :     if (dfAbsX <= 1.0)
    3727     5880560 :         return 1 - dfAbsX;
    3728             :     else
    3729      469322 :         return 0.0;
    3730             : }
    3731             : 
    3732      780803 : static double GWKBilinear4Values(double *padfValues)
    3733             : {
    3734      780803 :     double dfAbsX0 = fabs(padfValues[0]);
    3735      780803 :     double dfAbsX1 = fabs(padfValues[1]);
    3736      780803 :     double dfAbsX2 = fabs(padfValues[2]);
    3737      780803 :     double dfAbsX3 = fabs(padfValues[3]);
    3738      780803 :     if (dfAbsX0 <= 1.0)
    3739      780803 :         padfValues[0] = 1 - dfAbsX0;
    3740             :     else
    3741           0 :         padfValues[0] = 0.0;
    3742      780803 :     if (dfAbsX1 <= 1.0)
    3743      780803 :         padfValues[1] = 1 - dfAbsX1;
    3744             :     else
    3745           0 :         padfValues[1] = 0.0;
    3746      780803 :     if (dfAbsX2 <= 1.0)
    3747      780803 :         padfValues[2] = 1 - dfAbsX2;
    3748             :     else
    3749           0 :         padfValues[2] = 0.0;
    3750      780803 :     if (dfAbsX3 <= 1.0)
    3751      780781 :         padfValues[3] = 1 - dfAbsX3;
    3752             :     else
    3753          22 :         padfValues[3] = 0.0;
    3754      780803 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3755             : }
    3756             : 
    3757             : /************************************************************************/
    3758             : /*                              GWKCubic()                              */
    3759             : /************************************************************************/
    3760             : 
    3761       83022 : static double GWKCubic(double dfX)
    3762             : {
    3763       83022 :     return CubicKernel(dfX);
    3764             : }
    3765             : 
    3766     2453550 : static double GWKCubic4Values(double *padfValues)
    3767             : {
    3768     2453550 :     const double dfAbsX_0 = fabs(padfValues[0]);
    3769     2453550 :     const double dfAbsX_1 = fabs(padfValues[1]);
    3770     2453550 :     const double dfAbsX_2 = fabs(padfValues[2]);
    3771     2453550 :     const double dfAbsX_3 = fabs(padfValues[3]);
    3772     2453550 :     const double dfX2_0 = padfValues[0] * padfValues[0];
    3773     2453550 :     const double dfX2_1 = padfValues[1] * padfValues[1];
    3774     2453550 :     const double dfX2_2 = padfValues[2] * padfValues[2];
    3775     2453550 :     const double dfX2_3 = padfValues[3] * padfValues[3];
    3776             : 
    3777     2453550 :     double dfVal0 = 0.0;
    3778     2453550 :     if (dfAbsX_0 <= 1.0)
    3779      861032 :         dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
    3780     1592520 :     else if (dfAbsX_0 <= 2.0)
    3781     1592340 :         dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
    3782             : 
    3783     2453550 :     double dfVal1 = 0.0;
    3784     2453550 :     if (dfAbsX_1 <= 1.0)
    3785     1588760 :         dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
    3786      864787 :     else if (dfAbsX_1 <= 2.0)
    3787      864787 :         dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
    3788             : 
    3789     2453550 :     double dfVal2 = 0.0;
    3790     2453550 :     if (dfAbsX_2 <= 1.0)
    3791     1599800 :         dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
    3792      853749 :     else if (dfAbsX_2 <= 2.0)
    3793      853749 :         dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
    3794             : 
    3795     2453550 :     double dfVal3 = 0.0;
    3796     2453550 :     if (dfAbsX_3 <= 1.0)
    3797      871793 :         dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
    3798     1581760 :     else if (dfAbsX_3 <= 2.0)
    3799     1581600 :         dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
    3800             : 
    3801     2453550 :     padfValues[0] = dfVal0;
    3802     2453550 :     padfValues[1] = dfVal1;
    3803     2453550 :     padfValues[2] = dfVal2;
    3804     2453550 :     padfValues[3] = dfVal3;
    3805     2453550 :     return dfVal0 + dfVal1 + dfVal2 + dfVal3;
    3806             : }
    3807             : 
    3808             : /************************************************************************/
    3809             : /*                             GWKBSpline()                             */
    3810             : /************************************************************************/
    3811             : 
    3812             : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
    3813             : // Equation 8 with (B,C)=(1,0)
    3814             : // 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
    3815             : // 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
    3816             : 
    3817      136704 : static double GWKBSpline(double x)
    3818             : {
    3819      136704 :     const double xp2 = x + 2.0;
    3820      136704 :     const double xp1 = x + 1.0;
    3821      136704 :     const double xm1 = x - 1.0;
    3822             : 
    3823             :     // This will most likely be used, so we'll compute it ahead of time to
    3824             :     // avoid stalling the processor.
    3825      136704 :     const double xp2c = xp2 * xp2 * xp2;
    3826             : 
    3827             :     // Note that the test is computed only if it is needed.
    3828             :     // TODO(schwehr): Make this easier to follow.
    3829             :     return xp2 > 0.0
    3830      273408 :                ? ((xp1 > 0.0)
    3831      136704 :                       ? ((x > 0.0)
    3832      122310 :                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3833       87812 :                                    6.0 * x * x * x
    3834             :                              : 0.0) +
    3835      122310 :                             -4.0 * xp1 * xp1 * xp1
    3836             :                       : 0.0) +
    3837             :                      xp2c
    3838      136704 :                : 0.0;  // * 0.166666666666666666666
    3839             : }
    3840             : 
    3841     1895260 : static double GWKBSpline4Values(double *padfValues)
    3842             : {
    3843     9476280 :     for (int i = 0; i < 4; i++)
    3844             :     {
    3845     7581020 :         const double x = padfValues[i];
    3846     7581020 :         const double xp2 = x + 2.0;
    3847     7581020 :         const double xp1 = x + 1.0;
    3848     7581020 :         const double xm1 = x - 1.0;
    3849             : 
    3850             :         // This will most likely be used, so we'll compute it ahead of time to
    3851             :         // avoid stalling the processor.
    3852     7581020 :         const double xp2c = xp2 * xp2 * xp2;
    3853             : 
    3854             :         // Note that the test is computed only if it is needed.
    3855             :         // TODO(schwehr): Make this easier to follow.
    3856     7581020 :         padfValues[i] =
    3857             :             (xp2 > 0.0)
    3858    15105200 :                 ? ((xp1 > 0.0)
    3859     7524190 :                        ? ((x > 0.0)
    3860     5656910 :                               ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
    3861     3788650 :                                     6.0 * x * x * x
    3862             :                               : 0.0) +
    3863     5656910 :                              -4.0 * xp1 * xp1 * xp1
    3864             :                        : 0.0) +
    3865             :                       xp2c
    3866             :                 : 0.0;  // * 0.166666666666666666666
    3867             :     }
    3868     1895260 :     return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
    3869             : }
    3870             : /************************************************************************/
    3871             : /*                         GWKResampleWrkStruct                         */
    3872             : /************************************************************************/
    3873             : 
    3874             : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
    3875             : 
    3876             : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
    3877             :                                    double dfSrcX, double dfSrcY,
    3878             :                                    double *pdfDensity, double *pdfReal,
    3879             :                                    double *pdfImag,
    3880             :                                    GWKResampleWrkStruct *psWrkStruct);
    3881             : 
    3882             : struct _GWKResampleWrkStruct
    3883             : {
    3884             :     pfnGWKResampleType pfnGWKResample;
    3885             : 
    3886             :     // Space for saved X weights.
    3887             :     double *padfWeightsX;
    3888             :     bool *pabCalcX;
    3889             : 
    3890             :     double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
    3891             :     int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
    3892             :     int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
    3893             :     double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
    3894             :     double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
    3895             :     double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3896             :     double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
    3897             :     double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3898             :     double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3899             :     double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3900             :     double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
    3901             :     double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3902             :     double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
    3903             : 
    3904             :     // Space for saving a row of pixels.
    3905             :     double *padfRowDensity;
    3906             :     double *padfRowReal;
    3907             :     double *padfRowImag;
    3908             : };
    3909             : 
    3910             : /************************************************************************/
    3911             : /*                     GWKResampleCreateWrkStruct()                     */
    3912             : /************************************************************************/
    3913             : 
    3914             : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    3915             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    3916             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
    3917             : 
    3918             : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    3919             :                                         double dfSrcX, double dfSrcY,
    3920             :                                         double *pdfDensity, double *pdfReal,
    3921             :                                         double *pdfImag,
    3922             :                                         GWKResampleWrkStruct *psWrkStruct);
    3923             : 
    3924         401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
    3925             : {
    3926         401 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    3927         401 :     const int nYDist = (poWK->nYRadius + 1) * 2;
    3928             : 
    3929             :     GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
    3930         401 :         CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
    3931             : 
    3932             :     // Alloc space for saved X weights.
    3933         401 :     psWrkStruct->padfWeightsX =
    3934         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3935         401 :     psWrkStruct->pabCalcX =
    3936         401 :         static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
    3937             : 
    3938         401 :     psWrkStruct->padfWeightsY =
    3939         401 :         static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
    3940         401 :     psWrkStruct->iLastSrcX = -10;
    3941         401 :     psWrkStruct->iLastSrcY = -10;
    3942         401 :     psWrkStruct->dfLastDeltaX = -10;
    3943         401 :     psWrkStruct->dfLastDeltaY = -10;
    3944             : 
    3945             :     // Alloc space for saving a row of pixels.
    3946         401 :     if (poWK->pafUnifiedSrcDensity == nullptr &&
    3947         365 :         poWK->panUnifiedSrcValid == nullptr &&
    3948         342 :         poWK->papanBandSrcValid == nullptr)
    3949             :     {
    3950         342 :         psWrkStruct->padfRowDensity = nullptr;
    3951             :     }
    3952             :     else
    3953             :     {
    3954          59 :         psWrkStruct->padfRowDensity =
    3955          59 :             static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3956             :     }
    3957         401 :     psWrkStruct->padfRowReal =
    3958         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3959         401 :     psWrkStruct->padfRowImag =
    3960         401 :         static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
    3961             : 
    3962         401 :     if (poWK->eResample == GRA_Lanczos)
    3963             :     {
    3964          65 :         psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
    3965             : 
    3966          65 :         if (poWK->dfXScale < 1)
    3967             :         {
    3968           4 :             psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
    3969           4 :             psWrkStruct->dfSinPiXScaleOver3 =
    3970           4 :                 sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
    3971           4 :                              psWrkStruct->dfCosPiXScaleOver3);
    3972             :             // "Naive":
    3973             :             // const double dfCosPiXScale = cos(  M_PI * dfXScale );
    3974             :             // const double dfSinPiXScale = sin(  M_PI * dfXScale );
    3975             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3976           4 :             psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
    3977           4 :                                               psWrkStruct->dfCosPiXScaleOver3 -
    3978           4 :                                           3) *
    3979           4 :                                          psWrkStruct->dfCosPiXScaleOver3;
    3980           4 :             psWrkStruct->dfSinPiXScale = sqrt(
    3981           4 :                 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
    3982             :         }
    3983             : 
    3984          65 :         if (poWK->dfYScale < 1)
    3985             :         {
    3986          12 :             psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
    3987          12 :             psWrkStruct->dfSinPiYScaleOver3 =
    3988          12 :                 sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
    3989          12 :                              psWrkStruct->dfCosPiYScaleOver3);
    3990             :             // "Naive":
    3991             :             // const double dfCosPiYScale = cos(  M_PI * dfYScale );
    3992             :             // const double dfSinPiYScale = sin(  M_PI * dfYScale );
    3993             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
    3994          12 :             psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
    3995          12 :                                               psWrkStruct->dfCosPiYScaleOver3 -
    3996          12 :                                           3) *
    3997          12 :                                          psWrkStruct->dfCosPiYScaleOver3;
    3998          12 :             psWrkStruct->dfSinPiYScale = sqrt(
    3999          12 :                 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
    4000             :         }
    4001             :     }
    4002             :     else
    4003         336 :         psWrkStruct->pfnGWKResample = GWKResample;
    4004             : 
    4005         401 :     return psWrkStruct;
    4006             : }
    4007             : 
    4008             : /************************************************************************/
    4009             : /*                     GWKResampleDeleteWrkStruct()                     */
    4010             : /************************************************************************/
    4011             : 
    4012         401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
    4013             : {
    4014         401 :     CPLFree(psWrkStruct->padfWeightsX);
    4015         401 :     CPLFree(psWrkStruct->padfWeightsY);
    4016         401 :     CPLFree(psWrkStruct->pabCalcX);
    4017         401 :     CPLFree(psWrkStruct->padfRowDensity);
    4018         401 :     CPLFree(psWrkStruct->padfRowReal);
    4019         401 :     CPLFree(psWrkStruct->padfRowImag);
    4020         401 :     CPLFree(psWrkStruct);
    4021         401 : }
    4022             : 
    4023             : /************************************************************************/
    4024             : /*                            GWKResample()                             */
    4025             : /************************************************************************/
    4026             : 
    4027      239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4028             :                         double dfSrcY, double *pdfDensity, double *pdfReal,
    4029             :                         double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
    4030             : 
    4031             : {
    4032             :     // Save as local variables to avoid following pointers in loops.
    4033      239383 :     const int nSrcXSize = poWK->nSrcXSize;
    4034      239383 :     const int nSrcYSize = poWK->nSrcYSize;
    4035             : 
    4036      239383 :     double dfAccumulatorReal = 0.0;
    4037      239383 :     double dfAccumulatorImag = 0.0;
    4038      239383 :     double dfAccumulatorDensity = 0.0;
    4039      239383 :     double dfAccumulatorWeight = 0.0;
    4040      239383 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4041      239383 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4042      239383 :     const GPtrDiff_t iSrcOffset =
    4043      239383 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4044      239383 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4045      239383 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4046             : 
    4047      239383 :     const double dfXScale = poWK->dfXScale;
    4048      239383 :     const double dfYScale = poWK->dfYScale;
    4049             : 
    4050      239383 :     const int nXDist = (poWK->nXRadius + 1) * 2;
    4051             : 
    4052             :     // Space for saved X weights.
    4053      239383 :     double *padfWeightsX = psWrkStruct->padfWeightsX;
    4054      239383 :     bool *pabCalcX = psWrkStruct->pabCalcX;
    4055             : 
    4056             :     // Space for saving a row of pixels.
    4057      239383 :     double *padfRowDensity = psWrkStruct->padfRowDensity;
    4058      239383 :     double *padfRowReal = psWrkStruct->padfRowReal;
    4059      239383 :     double *padfRowImag = psWrkStruct->padfRowImag;
    4060             : 
    4061             :     // Mark as needing calculation (don't calculate the weights yet,
    4062             :     // because a mask may render it unnecessary).
    4063      239383 :     memset(pabCalcX, false, nXDist * sizeof(bool));
    4064             : 
    4065      239383 :     FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
    4066      239383 :     CPLAssert(pfnGetWeight);
    4067             : 
    4068             :     // Skip sampling over edge of image.
    4069      239383 :     int j = poWK->nFiltInitY;
    4070      239383 :     int jMax = poWK->nYRadius;
    4071      239383 :     if (iSrcY + j < 0)
    4072         566 :         j = -iSrcY;
    4073      239383 :     if (iSrcY + jMax >= nSrcYSize)
    4074         662 :         jMax = nSrcYSize - iSrcY - 1;
    4075             : 
    4076      239383 :     int iMin = poWK->nFiltInitX;
    4077      239383 :     int iMax = poWK->nXRadius;
    4078      239383 :     if (iSrcX + iMin < 0)
    4079         566 :         iMin = -iSrcX;
    4080      239383 :     if (iSrcX + iMax >= nSrcXSize)
    4081         659 :         iMax = nSrcXSize - iSrcX - 1;
    4082             : 
    4083      239383 :     const int bXScaleBelow1 = (dfXScale < 1.0);
    4084      239383 :     const int bYScaleBelow1 = (dfYScale < 1.0);
    4085             : 
    4086      239383 :     GPtrDiff_t iRowOffset =
    4087      239383 :         iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
    4088             : 
    4089             :     // Loop over pixel rows in the kernel.
    4090     1445930 :     for (; j <= jMax; ++j)
    4091             :     {
    4092     1206540 :         iRowOffset += nSrcXSize;
    4093             : 
    4094             :         // Get pixel values.
    4095             :         // We can potentially read extra elements after the "normal" end of the
    4096             :         // source arrays, but the contract of papabySrcImage[iBand],
    4097             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4098             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4099     1206540 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4100             :                             padfRowDensity, padfRowReal, padfRowImag))
    4101          72 :             continue;
    4102             : 
    4103             :         // Calculate the Y weight.
    4104             :         double dfWeight1 = (bYScaleBelow1)
    4105     1206470 :                                ? pfnGetWeight((j - dfDeltaY) * dfYScale)
    4106        1600 :                                : pfnGetWeight(j - dfDeltaY);
    4107             : 
    4108             :         // Iterate over pixels in row.
    4109     1206470 :         double dfAccumulatorRealLocal = 0.0;
    4110     1206470 :         double dfAccumulatorImagLocal = 0.0;
    4111     1206470 :         double dfAccumulatorDensityLocal = 0.0;
    4112     1206470 :         double dfAccumulatorWeightLocal = 0.0;
    4113             : 
    4114     7317420 :         for (int i = iMin; i <= iMax; ++i)
    4115             :         {
    4116             :             // Skip sampling if pixel has zero density.
    4117     6110940 :             if (padfRowDensity != nullptr &&
    4118       77277 :                 padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4119         546 :                 continue;
    4120             : 
    4121     6110400 :             double dfWeight2 = 0.0;
    4122             : 
    4123             :             // Make or use a cached set of weights for this row.
    4124     6110400 :             if (pabCalcX[i - iMin])
    4125             :             {
    4126             :                 // Use saved weight value instead of recomputing it.
    4127     4903920 :                 dfWeight2 = padfWeightsX[i - iMin];
    4128             :             }
    4129             :             else
    4130             :             {
    4131             :                 // Calculate & save the X weight.
    4132     1206480 :                 padfWeightsX[i - iMin] = dfWeight2 =
    4133     1206480 :                     (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
    4134        1600 :                                     : pfnGetWeight(i - dfDeltaX);
    4135             : 
    4136     1206480 :                 pabCalcX[i - iMin] = true;
    4137             :             }
    4138             : 
    4139             :             // Accumulate!
    4140     6110400 :             dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
    4141     6110400 :             dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
    4142     6110400 :             if (padfRowDensity != nullptr)
    4143       76731 :                 dfAccumulatorDensityLocal +=
    4144       76731 :                     padfRowDensity[i - iMin] * dfWeight2;
    4145     6110400 :             dfAccumulatorWeightLocal += dfWeight2;
    4146             :         }
    4147             : 
    4148     1206470 :         dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
    4149     1206470 :         dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
    4150     1206470 :         dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
    4151     1206470 :         dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
    4152             :     }
    4153             : 
    4154      239383 :     if (dfAccumulatorWeight < 0.000001 ||
    4155        1887 :         (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
    4156             :     {
    4157           0 :         *pdfDensity = 0.0;
    4158           0 :         return false;
    4159             :     }
    4160             : 
    4161             :     // Calculate the output taking into account weighting.
    4162      239383 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4163             :     {
    4164      239380 :         *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
    4165      239380 :         *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
    4166      239380 :         if (padfRowDensity != nullptr)
    4167        1884 :             *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
    4168             :         else
    4169      237496 :             *pdfDensity = 1.0;
    4170             :     }
    4171             :     else
    4172             :     {
    4173           3 :         *pdfReal = dfAccumulatorReal;
    4174           3 :         *pdfImag = dfAccumulatorImag;
    4175           3 :         if (padfRowDensity != nullptr)
    4176           3 :             *pdfDensity = dfAccumulatorDensity;
    4177             :         else
    4178           0 :             *pdfDensity = 1.0;
    4179             :     }
    4180             : 
    4181      239383 :     return true;
    4182             : }
    4183             : 
    4184             : /************************************************************************/
    4185             : /*                    GWKResampleOptimizedLanczos()                     */
    4186             : /************************************************************************/
    4187             : 
    4188      634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
    4189             :                                         double dfSrcX, double dfSrcY,
    4190             :                                         double *pdfDensity, double *pdfReal,
    4191             :                                         double *pdfImag,
    4192             :                                         GWKResampleWrkStruct *psWrkStruct)
    4193             : 
    4194             : {
    4195             :     // Save as local variables to avoid following pointers in loops.
    4196      634574 :     const int nSrcXSize = poWK->nSrcXSize;
    4197      634574 :     const int nSrcYSize = poWK->nSrcYSize;
    4198             : 
    4199      634574 :     double dfAccumulatorReal = 0.0;
    4200      634574 :     double dfAccumulatorImag = 0.0;
    4201      634574 :     double dfAccumulatorDensity = 0.0;
    4202      634574 :     double dfAccumulatorWeight = 0.0;
    4203      634574 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4204      634574 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4205      634574 :     const GPtrDiff_t iSrcOffset =
    4206      634574 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4207      634574 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4208      634574 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4209             : 
    4210      634574 :     const double dfXScale = poWK->dfXScale;
    4211      634574 :     const double dfYScale = poWK->dfYScale;
    4212             : 
    4213             :     // Space for saved X weights.
    4214      634574 :     double *const padfWeightsXShifted =
    4215      634574 :         psWrkStruct->padfWeightsX - poWK->nFiltInitX;
    4216      634574 :     double *const padfWeightsYShifted =
    4217      634574 :         psWrkStruct->padfWeightsY - poWK->nFiltInitY;
    4218             : 
    4219             :     // Space for saving a row of pixels.
    4220      634574 :     double *const padfRowDensity = psWrkStruct->padfRowDensity;
    4221      634574 :     double *const padfRowReal = psWrkStruct->padfRowReal;
    4222      634574 :     double *const padfRowImag = psWrkStruct->padfRowImag;
    4223             : 
    4224             :     // Skip sampling over edge of image.
    4225      634574 :     int jMin = poWK->nFiltInitY;
    4226      634574 :     int jMax = poWK->nYRadius;
    4227      634574 :     if (iSrcY + jMin < 0)
    4228       17334 :         jMin = -iSrcY;
    4229      634574 :     if (iSrcY + jMax >= nSrcYSize)
    4230        5638 :         jMax = nSrcYSize - iSrcY - 1;
    4231             : 
    4232      634574 :     int iMin = poWK->nFiltInitX;
    4233      634574 :     int iMax = poWK->nXRadius;
    4234      634574 :     if (iSrcX + iMin < 0)
    4235       19595 :         iMin = -iSrcX;
    4236      634574 :     if (iSrcX + iMax >= nSrcXSize)
    4237        6817 :         iMax = nSrcXSize - iSrcX - 1;
    4238             : 
    4239      634574 :     if (dfXScale < 1.0)
    4240             :     {
    4241      462945 :         while ((iMin - dfDeltaX) * dfXScale < -3.0)
    4242      260083 :             iMin++;
    4243      263534 :         while ((iMax - dfDeltaX) * dfXScale > 3.0)
    4244       60672 :             iMax--;
    4245             : 
    4246             :         // clang-format off
    4247             :         /*
    4248             :         Naive version:
    4249             :         for (int i = iMin; i <= iMax; ++i)
    4250             :         {
    4251             :             psWrkStruct->padfWeightsXShifted[i] =
    4252             :                 GWKLanczosSinc((i - dfDeltaX) * dfXScale);
    4253             :         }
    4254             : 
    4255             :         but given that:
    4256             : 
    4257             :         GWKLanczosSinc(x):
    4258             :             if (dfX == 0.0)
    4259             :                 return 1.0;
    4260             : 
    4261             :             const double dfPIX = M_PI * dfX;
    4262             :             const double dfPIXoverR = dfPIX / 3;
    4263             :             const double dfPIX2overR = dfPIX * dfPIXoverR;
    4264             :             return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
    4265             : 
    4266             :         and
    4267             :             sin (a + b) = sin a cos b + cos a sin b.
    4268             :             cos (a + b) = cos a cos b - sin a sin b.
    4269             : 
    4270             :         we can skip any sin() computation within the loop
    4271             :         */
    4272             :         // clang-format on
    4273             : 
    4274      202862 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4275      131072 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4276             :         {
    4277       71790 :             double dfX = (iMin - dfDeltaX) * dfXScale;
    4278             : 
    4279       71790 :             double dfPIXover3 = M_PI / 3 * dfX;
    4280       71790 :             double dfCosOver3 = cos(dfPIXover3);
    4281       71790 :             double dfSinOver3 = sin(dfPIXover3);
    4282             : 
    4283             :             // "Naive":
    4284             :             // double dfSin = sin( M_PI * dfX );
    4285             :             // double dfCos = cos( M_PI * dfX );
    4286             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4287       71790 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4288       71790 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4289             : 
    4290       71790 :             const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
    4291       71790 :             const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
    4292       71790 :             const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
    4293       71790 :             const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
    4294       71790 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4295       71790 :             padfWeightsXShifted[iMin] =
    4296       71790 :                 dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
    4297      683646 :             for (int i = iMin + 1; i <= iMax; ++i)
    4298             :             {
    4299      611856 :                 dfX += dfXScale;
    4300      611856 :                 const double dfNewSin =
    4301      611856 :                     dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
    4302      611856 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
    4303      611856 :                                              dfCosOver3 * dfSinPiXScaleOver3;
    4304      611856 :                 padfWeightsXShifted[i] =
    4305             :                     dfX == 0
    4306      611856 :                         ? 1.0
    4307      611856 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
    4308      611856 :                 const double dfNewCos =
    4309      611856 :                     dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
    4310      611856 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
    4311      611856 :                                              dfSinOver3 * dfSinPiXScaleOver3;
    4312      611856 :                 dfSin = dfNewSin;
    4313      611856 :                 dfCos = dfNewCos;
    4314      611856 :                 dfSinOver3 = dfNewSinOver3;
    4315      611856 :                 dfCosOver3 = dfNewCosOver3;
    4316             :             }
    4317             : 
    4318       71790 :             psWrkStruct->iLastSrcX = iSrcX;
    4319       71790 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4320             :         }
    4321             :     }
    4322             :     else
    4323             :     {
    4324      789372 :         while (iMin - dfDeltaX < -3.0)
    4325      357660 :             iMin++;
    4326      431712 :         while (iMax - dfDeltaX > 3.0)
    4327           0 :             iMax--;
    4328             : 
    4329      431712 :         if (iSrcX != psWrkStruct->iLastSrcX ||
    4330      225330 :             dfDeltaX != psWrkStruct->dfLastDeltaX)
    4331             :         {
    4332             :             // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
    4333             :             // following trigonometric formulas.
    4334             : 
    4335             :             // TODO(schwehr): Move this somewhere where it can be rendered at
    4336             :             // LaTeX.
    4337             :             // clang-format off
    4338             :             // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
    4339             :             //                            cos(M_PI * dfBase) * sin(M_PI * k)
    4340             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
    4341             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
    4342             :             // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
    4343             : 
    4344             :             // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
    4345             :             //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
    4346             :             // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
    4347             :             // clang-format on
    4348             : 
    4349      420092 :             const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
    4350      420092 :             const double dfSin2PIDeltaXOver3 =
    4351             :                 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
    4352             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
    4353      420092 :             const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
    4354      420092 :             const double dfSinPIDeltaX =
    4355      420092 :                 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
    4356      420092 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4357      420092 :             const double dfInvPI2Over3xSinPIDeltaX =
    4358             :                 dfInvPI2Over3 * dfSinPIDeltaX;
    4359      420092 :             const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
    4360      420092 :                 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
    4361      420092 :             const double dfSinPIOver3 = 0.8660254037844386;
    4362      420092 :             const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
    4363      420092 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
    4364             :             const double padfCst[] = {
    4365      420092 :                 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
    4366      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
    4367             :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
    4368      420092 :                 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
    4369      420092 :                     dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
    4370             : 
    4371     2974940 :             for (int i = iMin; i <= iMax; ++i)
    4372             :             {
    4373     2554850 :                 const double dfX = i - dfDeltaX;
    4374     2554850 :                 if (dfX == 0.0)
    4375       58282 :                     padfWeightsXShifted[i] = 1.0;
    4376             :                 else
    4377     2496570 :                     padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
    4378             : #if DEBUG_VERBOSE
    4379             :                 // TODO(schwehr): AlmostEqual.
    4380             :                 // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
    4381             :                 //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
    4382             : #endif
    4383             :             }
    4384             : 
    4385      420092 :             psWrkStruct->iLastSrcX = iSrcX;
    4386      420092 :             psWrkStruct->dfLastDeltaX = dfDeltaX;
    4387             :         }
    4388             :     }
    4389             : 
    4390      634574 :     if (dfYScale < 1.0)
    4391             :     {
    4392       15754 :         while ((jMin - dfDeltaY) * dfYScale < -3.0)
    4393        9500 :             jMin++;
    4394        9854 :         while ((jMax - dfDeltaY) * dfYScale > 3.0)
    4395        3600 :             jMax--;
    4396             : 
    4397             :         // clang-format off
    4398             :         /*
    4399             :         Naive version:
    4400             :         for (int j = jMin; j <= jMax; ++j)
    4401             :         {
    4402             :             padfWeightsYShifted[j] =
    4403             :                 GWKLanczosSinc((j - dfDeltaY) * dfYScale);
    4404             :         }
    4405             :         */
    4406             :         // clang-format on
    4407             : 
    4408        6254 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4409        6127 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4410             :         {
    4411         127 :             double dfY = (jMin - dfDeltaY) * dfYScale;
    4412             : 
    4413         127 :             double dfPIYover3 = M_PI / 3 * dfY;
    4414         127 :             double dfCosOver3 = cos(dfPIYover3);
    4415         127 :             double dfSinOver3 = sin(dfPIYover3);
    4416             : 
    4417             :             // "Naive":
    4418             :             // double dfSin = sin( M_PI * dfY );
    4419             :             // double dfCos = cos( M_PI * dfY );
    4420             :             // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
    4421         127 :             double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
    4422         127 :             double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
    4423             : 
    4424         127 :             const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
    4425         127 :             const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
    4426         127 :             const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
    4427         127 :             const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
    4428         127 :             constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
    4429         127 :             padfWeightsYShifted[jMin] =
    4430         127 :                 dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
    4431        1210 :             for (int j = jMin + 1; j <= jMax; ++j)
    4432             :             {
    4433        1083 :                 dfY += dfYScale;
    4434        1083 :                 const double dfNewSin =
    4435        1083 :                     dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
    4436        1083 :                 const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
    4437        1083 :                                              dfCosOver3 * dfSinPiYScaleOver3;
    4438        1083 :                 padfWeightsYShifted[j] =
    4439             :                     dfY == 0
    4440        1083 :                         ? 1.0
    4441        1083 :                         : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
    4442        1083 :                 const double dfNewCos =
    4443        1083 :                     dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
    4444        1083 :                 const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
    4445        1083 :                                              dfSinOver3 * dfSinPiYScaleOver3;
    4446        1083 :                 dfSin = dfNewSin;
    4447        1083 :                 dfCos = dfNewCos;
    4448        1083 :                 dfSinOver3 = dfNewSinOver3;
    4449        1083 :                 dfCosOver3 = dfNewCosOver3;
    4450             :             }
    4451             : 
    4452         127 :             psWrkStruct->iLastSrcY = iSrcY;
    4453         127 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4454             :         }
    4455             :     }
    4456             :     else
    4457             :     {
    4458     1106550 :         while (jMin - dfDeltaY < -3.0)
    4459      478232 :             jMin++;
    4460      628320 :         while (jMax - dfDeltaY > 3.0)
    4461           0 :             jMax--;
    4462             : 
    4463      628320 :         if (iSrcY != psWrkStruct->iLastSrcY ||
    4464      627488 :             dfDeltaY != psWrkStruct->dfLastDeltaY)
    4465             :         {
    4466        7198 :             const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
    4467        7198 :             const double dfSin2PIDeltaYOver3 =
    4468             :                 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
    4469             :             // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
    4470        7198 :             const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
    4471        7198 :             const double dfSinPIDeltaY =
    4472        7198 :                 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
    4473        7198 :             const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
    4474        7198 :             const double dfInvPI2Over3xSinPIDeltaY =
    4475             :                 dfInvPI2Over3 * dfSinPIDeltaY;
    4476        7198 :             const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
    4477        7198 :                 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
    4478        7198 :             const double dfSinPIOver3 = 0.8660254037844386;
    4479        7198 :             const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
    4480        7198 :                 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
    4481             :             const double padfCst[] = {
    4482        7198 :                 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
    4483        7198 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
    4484             :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
    4485        7198 :                 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
    4486        7198 :                     dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
    4487             : 
    4488       47777 :             for (int j = jMin; j <= jMax; ++j)
    4489             :             {
    4490       40579 :                 const double dfY = j - dfDeltaY;
    4491       40579 :                 if (dfY == 0.0)
    4492         468 :                     padfWeightsYShifted[j] = 1.0;
    4493             :                 else
    4494       40111 :                     padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
    4495             : #if DEBUG_VERBOSE
    4496             :                 // TODO(schwehr): AlmostEqual.
    4497             :                 // CPLAssert(fabs(padfWeightsYShifted[j] -
    4498             :                 //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
    4499             : #endif
    4500             :             }
    4501             : 
    4502        7198 :             psWrkStruct->iLastSrcY = iSrcY;
    4503        7198 :             psWrkStruct->dfLastDeltaY = dfDeltaY;
    4504             :         }
    4505             :     }
    4506             : 
    4507             :     // If we have no density information, we can simply compute the
    4508             :     // accumulated weight.
    4509      634574 :     if (padfRowDensity == nullptr)
    4510             :     {
    4511      634574 :         double dfRowAccWeight = 0.0;
    4512     5159250 :         for (int i = iMin; i <= iMax; ++i)
    4513             :         {
    4514     4524680 :             dfRowAccWeight += padfWeightsXShifted[i];
    4515             :         }
    4516      634574 :         double dfColAccWeight = 0.0;
    4517     4564130 :         for (int j = jMin; j <= jMax; ++j)
    4518             :         {
    4519     3929550 :             dfColAccWeight += padfWeightsYShifted[j];
    4520             :         }
    4521      634574 :         dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
    4522             :     }
    4523             : 
    4524             :     // Loop over pixel rows in the kernel.
    4525             : 
    4526      634574 :     if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
    4527      633954 :         !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
    4528             :         !padfRowDensity)
    4529             :     {
    4530             :         // Optimization for Byte case without any masking/alpha
    4531             : 
    4532      633954 :         if (dfAccumulatorWeight < 0.000001)
    4533             :         {
    4534           0 :             *pdfDensity = 0.0;
    4535           0 :             return false;
    4536             :         }
    4537             : 
    4538      633954 :         const GByte *pSrc =
    4539      633954 :             reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
    4540      633954 :         pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4541             : 
    4542             : #if defined(USE_SSE2)
    4543      633954 :         if (iMax - iMin + 1 == 6)
    4544             :         {
    4545             :             // This is just an optimized version of the general case in
    4546             :             // the else clause.
    4547             : 
    4548      359916 :             pSrc += iMin;
    4549      359916 :             int j = jMin;
    4550             :             const auto fourXWeights =
    4551      359916 :                 XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
    4552             : 
    4553             :             // Process 2 lines at the same time.
    4554     1424180 :             for (; j < jMax; j += 2)
    4555             :             {
    4556             :                 const XMMReg4Double v_acc =
    4557     1064270 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4558             :                 const XMMReg4Double v_acc2 =
    4559     1064270 :                     XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
    4560     1064270 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4561     1064270 :                 const double dfRowAccEnd =
    4562     1064270 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4563     1064270 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4564     1064270 :                 dfAccumulatorReal +=
    4565     1064270 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4566     1064270 :                 const double dfRowAcc2 = v_acc2.GetHorizSum();
    4567     1064270 :                 const double dfRowAcc2End =
    4568     1064270 :                     pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
    4569     1064270 :                     pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
    4570     1064270 :                 dfAccumulatorReal +=
    4571     1064270 :                     (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
    4572     1064270 :                 pSrc += 2 * nSrcXSize;
    4573             :             }
    4574      359916 :             if (j == jMax)
    4575             :             {
    4576             :                 // Process last line if there's an odd number of them.
    4577             : 
    4578             :                 const XMMReg4Double v_acc =
    4579       90039 :                     XMMReg4Double::Load4Val(pSrc) * fourXWeights;
    4580       90039 :                 const double dfRowAcc = v_acc.GetHorizSum();
    4581       90039 :                 const double dfRowAccEnd =
    4582       90039 :                     pSrc[4] * padfWeightsXShifted[iMin + 4] +
    4583       90039 :                     pSrc[5] * padfWeightsXShifted[iMin + 5];
    4584       90039 :                 dfAccumulatorReal +=
    4585       90039 :                     (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
    4586             :             }
    4587             :         }
    4588             :         else
    4589             : #endif
    4590             :         {
    4591     1982080 :             for (int j = jMin; j <= jMax; ++j)
    4592             :             {
    4593     1708040 :                 int i = iMin;
    4594     1708040 :                 double dfRowAcc1 = 0.0;
    4595     1708040 :                 double dfRowAcc2 = 0.0;
    4596             :                 // A bit of loop unrolling
    4597     8474620 :                 for (; i < iMax; i += 2)
    4598             :                 {
    4599     6766580 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4600     6766580 :                     dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
    4601             :                 }
    4602     1708040 :                 if (i == iMax)
    4603             :                 {
    4604             :                     // Process last column if there's an odd number of them.
    4605     1188570 :                     dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
    4606             :                 }
    4607             : 
    4608     1708040 :                 dfAccumulatorReal +=
    4609     1708040 :                     (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
    4610     1708040 :                 pSrc += nSrcXSize;
    4611             :             }
    4612             :         }
    4613             : 
    4614             :         // Calculate the output taking into account weighting.
    4615      633954 :         if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4616             :         {
    4617      579748 :             const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4618      579748 :             *pdfReal = dfAccumulatorReal * dfInvAcc;
    4619      579748 :             *pdfDensity = 1.0;
    4620             :         }
    4621             :         else
    4622             :         {
    4623       54206 :             *pdfReal = dfAccumulatorReal;
    4624       54206 :             *pdfDensity = 1.0;
    4625             :         }
    4626             : 
    4627      633954 :         return true;
    4628             :     }
    4629             : 
    4630         620 :     GPtrDiff_t iRowOffset =
    4631         620 :         iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
    4632             : 
    4633         620 :     const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
    4634             : 
    4635        3560 :     for (int j = jMin; j <= jMax; ++j)
    4636             :     {
    4637        2940 :         iRowOffset += nSrcXSize;
    4638             : 
    4639             :         // Get pixel values.
    4640             :         // We can potentially read extra elements after the "normal" end of the
    4641             :         // source arrays, but the contract of papabySrcImage[iBand],
    4642             :         // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
    4643             :         // is to have WARP_EXTRA_ELTS reserved at their end.
    4644        2940 :         if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
    4645             :                             padfRowDensity, padfRowReal, padfRowImag))
    4646           0 :             continue;
    4647             : 
    4648        2940 :         const double dfWeight1 = padfWeightsYShifted[j];
    4649             : 
    4650             :         // Iterate over pixels in row.
    4651        2940 :         if (padfRowDensity != nullptr)
    4652             :         {
    4653           0 :             for (int i = iMin; i <= iMax; ++i)
    4654             :             {
    4655             :                 // Skip sampling if pixel has zero density.
    4656           0 :                 if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
    4657           0 :                     continue;
    4658             : 
    4659             :                 //  Use a cached set of weights for this row.
    4660           0 :                 const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
    4661             : 
    4662             :                 // Accumulate!
    4663           0 :                 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
    4664           0 :                 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
    4665           0 :                 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
    4666           0 :                 dfAccumulatorWeight += dfWeight2;
    4667             :             }
    4668             :         }
    4669        2940 :         else if (bIsNonComplex)
    4670             :         {
    4671        1764 :             double dfRowAccReal = 0.0;
    4672       10560 :             for (int i = iMin; i <= iMax; ++i)
    4673             :             {
    4674        8796 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4675             : 
    4676             :                 // Accumulate!
    4677        8796 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4678             :             }
    4679             : 
    4680        1764 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4681             :         }
    4682             :         else
    4683             :         {
    4684        1176 :             double dfRowAccReal = 0.0;
    4685        1176 :             double dfRowAccImag = 0.0;
    4686        7040 :             for (int i = iMin; i <= iMax; ++i)
    4687             :             {
    4688        5864 :                 const double dfWeight2 = padfWeightsXShifted[i];
    4689             : 
    4690             :                 // Accumulate!
    4691        5864 :                 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
    4692        5864 :                 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
    4693             :             }
    4694             : 
    4695        1176 :             dfAccumulatorReal += dfRowAccReal * dfWeight1;
    4696        1176 :             dfAccumulatorImag += dfRowAccImag * dfWeight1;
    4697             :         }
    4698             :     }
    4699             : 
    4700         620 :     if (dfAccumulatorWeight < 0.000001)
    4701             :     {
    4702           0 :         *pdfDensity = 0.0;
    4703           0 :         return false;
    4704             :     }
    4705         620 :     else if (padfRowDensity)
    4706             :     {
    4707           0 :         if (dfAccumulatorDensity < 0.000001)
    4708             :         {
    4709           0 :             *pdfDensity = 0.0;
    4710           0 :             return false;
    4711             :         }
    4712             : 
    4713             :         // TODO: previously we returned *pdfDensity when
    4714             :         // nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)
    4715             :         // that was initially introduced in
    4716             :         // https://github.com/OSGeo/gdal/commit/b68d31418f4826402dc44b52152c0493b682fea8
    4717             :         // but in scenarios like https://github.com/OSGeo/gdal/issues/14560
    4718             :         // this lead to almst full removal of text printed on transparent
    4719             :         // background. It is not clear what we should do.
    4720             :         //
    4721             :         // Wisdom from https://mastodon.social/@martinfleis@fosstodon.org/116568957538009577
    4722             :         // LJW: "It is a fundamental change of support problem with no closed
    4723             :         // solution. We looked into bootstrapping to solve it, but never
    4724             :         // published. Basic idea was to bootstrap a constant sample size, set
    4725             :         // the weight of candidates as a kernel function on the distance from
    4726             :         // the target, and set the bandwidth needed at each pixel as that
    4727             :         // which maximizes the entropy of a histogram of sample weights.
    4728             :         // Best you can do is define some loss and optimize the resampling
    4729             :         // against it."
    4730             :     }
    4731             : 
    4732             :     // Calculate the output taking into account weighting.
    4733         620 :     if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
    4734             :     {
    4735           0 :         const double dfInvAcc = 1.0 / dfAccumulatorWeight;
    4736           0 :         *pdfReal = dfAccumulatorReal * dfInvAcc;
    4737           0 :         *pdfImag = dfAccumulatorImag * dfInvAcc;
    4738           0 :         if (padfRowDensity != nullptr)
    4739           0 :             *pdfDensity = dfAccumulatorDensity * dfInvAcc;
    4740             :         else
    4741           0 :             *pdfDensity = 1.0;
    4742             :     }
    4743             :     else
    4744             :     {
    4745         620 :         *pdfReal = dfAccumulatorReal;
    4746         620 :         *pdfImag = dfAccumulatorImag;
    4747         620 :         if (padfRowDensity != nullptr)
    4748           0 :             *pdfDensity = dfAccumulatorDensity;
    4749             :         else
    4750         620 :             *pdfDensity = 1.0;
    4751             :     }
    4752             : 
    4753         620 :     return true;
    4754             : }
    4755             : 
    4756             : /************************************************************************/
    4757             : /*                         GWKComputeWeights()                          */
    4758             : /************************************************************************/
    4759             : 
    4760     1091070 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
    4761             :                               double dfDeltaX, double dfXScale, int jMin,
    4762             :                               int jMax, double dfDeltaY, double dfYScale,
    4763             :                               double *padfWeightsHorizontal,
    4764             :                               double *padfWeightsVertical, double &dfInvWeights)
    4765             : {
    4766             : 
    4767     1091070 :     const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
    4768     1091070 :     CPLAssert(pfnGetWeight);
    4769     1091070 :     const FilterFunc4ValuesType pfnGetWeight4Values =
    4770     1091070 :         apfGWKFilter4Values[eResample];
    4771     1091070 :     CPLAssert(pfnGetWeight4Values);
    4772             : 
    4773     1091070 :     int i = iMin;  // Used after for.
    4774     1091070 :     int iC = 0;    // Used after for.
    4775             :     // Not zero, but as close as possible to it, to avoid potential division by
    4776             :     // zero at end of function
    4777     1091070 :     double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
    4778     2403700 :     for (; i + 2 < iMax; i += 4, iC += 4)
    4779             :     {
    4780     1312620 :         padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
    4781     1312620 :         padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
    4782     1312620 :         padfWeightsHorizontal[iC + 2] =
    4783     1312620 :             padfWeightsHorizontal[iC + 1] + dfXScale;
    4784     1312620 :         padfWeightsHorizontal[iC + 3] =
    4785     1312620 :             padfWeightsHorizontal[iC + 2] + dfXScale;
    4786     1312620 :         dfAccumulatorWeightHorizontal +=
    4787     1312620 :             pfnGetWeight4Values(padfWeightsHorizontal + iC);
    4788             :     }
    4789     1145700 :     for (; i <= iMax; ++i, ++iC)
    4790             :     {
    4791       54623 :         const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
    4792       54623 :         padfWeightsHorizontal[iC] = dfWeight;
    4793       54623 :         dfAccumulatorWeightHorizontal += dfWeight;
    4794             :     }
    4795             : 
    4796     1091070 :     int j = jMin;  // Used after for.
    4797     1091070 :     int jC = 0;    // Used after for.
    4798             :     // Not zero, but as close as possible to it, to avoid potential division by
    4799             :     // zero at end of function
    4800     1091070 :     double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
    4801     2332840 :     for (; j + 2 < jMax; j += 4, jC += 4)
    4802             :     {
    4803     1241770 :         padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
    4804     1241770 :         padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
    4805     1241770 :         padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
    4806     1241770 :         padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
    4807     1241770 :         dfAccumulatorWeightVertical +=
    4808     1241770 :             pfnGetWeight4Values(padfWeightsVertical + jC);
    4809             :     }
    4810     1152230 :     for (; j <= jMax; ++j, ++jC)
    4811             :     {
    4812       61154 :         const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
    4813       61154 :         padfWeightsVertical[jC] = dfWeight;
    4814       61154 :         dfAccumulatorWeightVertical += dfWeight;
    4815             :     }
    4816             : 
    4817     1091070 :     dfInvWeights =
    4818     1091070 :         1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
    4819     1091070 : }
    4820             : 
    4821             : /************************************************************************/
    4822             : /*                        GWKResampleNoMasksT()                         */
    4823             : /************************************************************************/
    4824             : 
    4825             : template <class T>
    4826             : static bool
    4827             : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
    4828             :                     double dfSrcY, T *pValue, double *padfWeightsHorizontal,
    4829             :                     double *padfWeightsVertical, double &dfInvWeights)
    4830             : 
    4831             : {
    4832             :     // Commonly used; save locally.
    4833             :     const int nSrcXSize = poWK->nSrcXSize;
    4834             :     const int nSrcYSize = poWK->nSrcYSize;
    4835             : 
    4836             :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4837             :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4838             :     const GPtrDiff_t iSrcOffset =
    4839             :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4840             : 
    4841             :     const int nXRadius = poWK->nXRadius;
    4842             :     const int nYRadius = poWK->nYRadius;
    4843             : 
    4844             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4845             :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4846             :         nYRadius > nSrcYSize)
    4847             :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4848             :                                                   pValue);
    4849             : 
    4850             :     T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
    4851             :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4852             :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4853             : 
    4854             :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4855             :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4856             : 
    4857             :     int iMin = 1 - nXRadius;
    4858             :     if (iSrcX + iMin < 0)
    4859             :         iMin = -iSrcX;
    4860             :     int iMax = nXRadius;
    4861             :     if (iSrcX + iMax >= nSrcXSize - 1)
    4862             :         iMax = nSrcXSize - 1 - iSrcX;
    4863             : 
    4864             :     int jMin = 1 - nYRadius;
    4865             :     if (iSrcY + jMin < 0)
    4866             :         jMin = -iSrcY;
    4867             :     int jMax = nYRadius;
    4868             :     if (iSrcY + jMax >= nSrcYSize - 1)
    4869             :         jMax = nSrcYSize - 1 - iSrcY;
    4870             : 
    4871             :     if (iBand == 0)
    4872             :     {
    4873             :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4874             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4875             :                           padfWeightsVertical, dfInvWeights);
    4876             :     }
    4877             : 
    4878             :     // Loop over all rows in the kernel.
    4879             :     double dfAccumulator = 0.0;
    4880             :     for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
    4881             :     {
    4882             :         const GPtrDiff_t iSampJ =
    4883             :             iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
    4884             : 
    4885             :         // Loop over all pixels in the row.
    4886             :         double dfAccumulatorLocal = 0.0;
    4887             :         double dfAccumulatorLocal2 = 0.0;
    4888             :         int iC = 0;
    4889             :         int i = iMin;
    4890             :         // Process by chunk of 4 cols.
    4891             :         for (; i + 2 < iMax; i += 4, iC += 4)
    4892             :         {
    4893             :             // Retrieve the pixel & accumulate.
    4894             :             dfAccumulatorLocal +=
    4895             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4896             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4897             :                                   padfWeightsHorizontal[iC + 1];
    4898             :             dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
    4899             :                                    padfWeightsHorizontal[iC + 2];
    4900             :             dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
    4901             :                                    padfWeightsHorizontal[iC + 3];
    4902             :         }
    4903             :         dfAccumulatorLocal += dfAccumulatorLocal2;
    4904             :         if (i < iMax)
    4905             :         {
    4906             :             dfAccumulatorLocal +=
    4907             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4908             :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    4909             :                                   padfWeightsHorizontal[iC + 1];
    4910             :             i += 2;
    4911             :             iC += 2;
    4912             :         }
    4913             :         if (i == iMax)
    4914             :         {
    4915             :             dfAccumulatorLocal +=
    4916             :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    4917             :         }
    4918             : 
    4919             :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    4920             :     }
    4921             : 
    4922             :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    4923             : 
    4924             :     return true;
    4925             : }
    4926             : 
    4927             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    4928             : /* Could possibly be used too on 32bit, but we would need to check at runtime */
    4929             : #if defined(USE_SSE2)
    4930             : 
    4931             : /************************************************************************/
    4932             : /*                     GWKResampleNoMasks_SSE2_T()                      */
    4933             : /************************************************************************/
    4934             : 
    4935             : template <class T>
    4936     1382149 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
    4937             :                                       double dfSrcX, double dfSrcY, T *pValue,
    4938             :                                       double *padfWeightsHorizontal,
    4939             :                                       double *padfWeightsVertical,
    4940             :                                       double &dfInvWeights)
    4941             : {
    4942             :     // Commonly used; save locally.
    4943     1382149 :     const int nSrcXSize = poWK->nSrcXSize;
    4944     1382149 :     const int nSrcYSize = poWK->nSrcYSize;
    4945             : 
    4946     1382149 :     const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
    4947     1382149 :     const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
    4948     1382149 :     const GPtrDiff_t iSrcOffset =
    4949     1382149 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    4950     1382149 :     const int nXRadius = poWK->nXRadius;
    4951     1382149 :     const int nYRadius = poWK->nYRadius;
    4952             : 
    4953             :     // Politely refuse to process invalid coordinates or obscenely small image.
    4954     1382149 :     if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
    4955             :         nYRadius > nSrcYSize)
    4956           3 :         return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    4957           3 :                                                   pValue);
    4958             : 
    4959     1382146 :     const T *pSrcBand =
    4960     1382146 :         reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    4961             : 
    4962     1382146 :     const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
    4963     1382146 :     const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
    4964     1382146 :     const double dfXScale = std::min(poWK->dfXScale, 1.0);
    4965     1382146 :     const double dfYScale = std::min(poWK->dfYScale, 1.0);
    4966             : 
    4967     1382146 :     int iMin = 1 - nXRadius;
    4968     1382146 :     if (iSrcX + iMin < 0)
    4969       20312 :         iMin = -iSrcX;
    4970     1382146 :     int iMax = nXRadius;
    4971     1382146 :     if (iSrcX + iMax >= nSrcXSize - 1)
    4972        7970 :         iMax = nSrcXSize - 1 - iSrcX;
    4973             : 
    4974     1382146 :     int jMin = 1 - nYRadius;
    4975     1382146 :     if (iSrcY + jMin < 0)
    4976       22209 :         jMin = -iSrcY;
    4977     1382146 :     int jMax = nYRadius;
    4978     1382146 :     if (iSrcY + jMax >= nSrcYSize - 1)
    4979        9295 :         jMax = nSrcYSize - 1 - iSrcY;
    4980             : 
    4981     1382146 :     if (iBand == 0)
    4982             :     {
    4983     1091074 :         GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
    4984             :                           jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
    4985             :                           padfWeightsVertical, dfInvWeights);
    4986             :     }
    4987             : 
    4988     1382146 :     GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
    4989             :     // Process by chunk of 4 rows.
    4990     1382146 :     int jC = 0;
    4991     1382146 :     int j = jMin;
    4992     1382146 :     double dfAccumulator = 0.0;
    4993     3068580 :     for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
    4994             :     {
    4995             :         // Loop over all pixels in the row.
    4996     1686436 :         int iC = 0;
    4997     1686436 :         int i = iMin;
    4998             :         // Process by chunk of 4 cols.
    4999     1686436 :         XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
    5000     1686436 :         XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
    5001     1686436 :         XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
    5002     1686436 :         XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
    5003     4251632 :         for (; i + 2 < iMax; i += 4, iC += 4)
    5004             :         {
    5005             :             // Retrieve the pixel & accumulate.
    5006     2565196 :             XMMReg4Double v_pixels_1 =
    5007     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    5008     2565196 :             XMMReg4Double v_pixels_2 =
    5009     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
    5010     2565196 :             XMMReg4Double v_pixels_3 =
    5011     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    5012     2565196 :             XMMReg4Double v_pixels_4 =
    5013     2565196 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    5014             : 
    5015     2565196 :             XMMReg4Double v_padfWeight =
    5016     2565196 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    5017             : 
    5018     2565196 :             v_acc_1 += v_pixels_1 * v_padfWeight;
    5019     2565196 :             v_acc_2 += v_pixels_2 * v_padfWeight;
    5020     2565196 :             v_acc_3 += v_pixels_3 * v_padfWeight;
    5021     2565196 :             v_acc_4 += v_pixels_4 * v_padfWeight;
    5022             :         }
    5023             : 
    5024     1686436 :         if (i < iMax)
    5025             :         {
    5026       25512 :             XMMReg2Double v_pixels_1 =
    5027       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
    5028       25512 :             XMMReg2Double v_pixels_2 =
    5029       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
    5030       25512 :             XMMReg2Double v_pixels_3 =
    5031       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
    5032       25512 :             XMMReg2Double v_pixels_4 =
    5033       25512 :                 XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
    5034             : 
    5035       25512 :             XMMReg2Double v_padfWeight =
    5036       25512 :                 XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
    5037             : 
    5038       25512 :             v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
    5039       25512 :             v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
    5040       25512 :             v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
    5041       25512 :             v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
    5042             : 
    5043       25512 :             i += 2;
    5044       25512 :             iC += 2;
    5045             :         }
    5046             : 
    5047     1686436 :         double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
    5048     1686436 :         double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
    5049     1686436 :         double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
    5050     1686436 :         double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
    5051             : 
    5052     1686436 :         if (i == iMax)
    5053             :         {
    5054       27557 :             dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
    5055       27557 :                                     padfWeightsHorizontal[iC];
    5056       27557 :             dfAccumulatorLocal_2 +=
    5057       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
    5058       27557 :                 padfWeightsHorizontal[iC];
    5059       27557 :             dfAccumulatorLocal_3 +=
    5060       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
    5061       27557 :                 padfWeightsHorizontal[iC];
    5062       27557 :             dfAccumulatorLocal_4 +=
    5063       27557 :                 static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
    5064       27557 :                 padfWeightsHorizontal[iC];
    5065             :         }
    5066             : 
    5067     1686436 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
    5068     1686436 :         dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
    5069     1686436 :         dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
    5070     1686436 :         dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
    5071             :     }
    5072     1456100 :     for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
    5073             :     {
    5074             :         // Loop over all pixels in the row.
    5075       73954 :         int iC = 0;
    5076       73954 :         int i = iMin;
    5077             :         // Process by chunk of 4 cols.
    5078       73954 :         XMMReg4Double v_acc = XMMReg4Double::Zero();
    5079      172926 :         for (; i + 2 < iMax; i += 4, iC += 4)
    5080             :         {
    5081             :             // Retrieve the pixel & accumulate.
    5082       98972 :             XMMReg4Double v_pixels =
    5083       98972 :                 XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
    5084       98972 :             XMMReg4Double v_padfWeight =
    5085       98972 :                 XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
    5086             : 
    5087       98972 :             v_acc += v_pixels * v_padfWeight;
    5088             :         }
    5089             : 
    5090       73954 :         double dfAccumulatorLocal = v_acc.GetHorizSum();
    5091             : 
    5092       73954 :         if (i < iMax)
    5093             :         {
    5094        1862 :             dfAccumulatorLocal +=
    5095        1862 :                 double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
    5096        1862 :             dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
    5097        1862 :                                   padfWeightsHorizontal[iC + 1];
    5098        1862 :             i += 2;
    5099        1862 :             iC += 2;
    5100             :         }
    5101       73954 :         if (i == iMax)
    5102             :         {
    5103        1803 :             dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
    5104        1803 :                                   padfWeightsHorizontal[iC];
    5105             :         }
    5106             : 
    5107       73954 :         dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
    5108             :     }
    5109             : 
    5110     1382146 :     *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
    5111             : 
    5112     1382146 :     return true;
    5113             : }
    5114             : 
    5115             : /************************************************************************/
    5116             : /*                     GWKResampleNoMasksT<GByte>()                     */
    5117             : /************************************************************************/
    5118             : 
    5119             : template <>
    5120      877023 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
    5121             :                                 double dfSrcX, double dfSrcY, GByte *pValue,
    5122             :                                 double *padfWeightsHorizontal,
    5123             :                                 double *padfWeightsVertical,
    5124             :                                 double &dfInvWeights)
    5125             : {
    5126      877023 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5127             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5128      877023 :                                      dfInvWeights);
    5129             : }
    5130             : 
    5131             : /************************************************************************/
    5132             : /*                    GWKResampleNoMasksT<GInt16>()                     */
    5133             : /************************************************************************/
    5134             : 
    5135             : template <>
    5136      252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
    5137             :                                  double dfSrcX, double dfSrcY, GInt16 *pValue,
    5138             :                                  double *padfWeightsHorizontal,
    5139             :                                  double *padfWeightsVertical,
    5140             :                                  double &dfInvWeights)
    5141             : {
    5142      252563 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5143             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5144      252563 :                                      dfInvWeights);
    5145             : }
    5146             : 
    5147             : /************************************************************************/
    5148             : /*                    GWKResampleNoMasksT<GUInt16>()                    */
    5149             : /************************************************************************/
    5150             : 
    5151             : template <>
    5152      250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
    5153             :                                   double dfSrcX, double dfSrcY, GUInt16 *pValue,
    5154             :                                   double *padfWeightsHorizontal,
    5155             :                                   double *padfWeightsVertical,
    5156             :                                   double &dfInvWeights)
    5157             : {
    5158      250063 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5159             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5160      250063 :                                      dfInvWeights);
    5161             : }
    5162             : 
    5163             : /************************************************************************/
    5164             : /*                     GWKResampleNoMasksT<float>()                     */
    5165             : /************************************************************************/
    5166             : 
    5167             : template <>
    5168        2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
    5169             :                                 double dfSrcX, double dfSrcY, float *pValue,
    5170             :                                 double *padfWeightsHorizontal,
    5171             :                                 double *padfWeightsVertical,
    5172             :                                 double &dfInvWeights)
    5173             : {
    5174        2500 :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5175             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5176        2500 :                                      dfInvWeights);
    5177             : }
    5178             : 
    5179             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    5180             : 
    5181             : /************************************************************************/
    5182             : /*                    GWKResampleNoMasksT<double>()                     */
    5183             : /************************************************************************/
    5184             : 
    5185             : template <>
    5186             : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
    5187             :                                  double dfSrcX, double dfSrcY, double *pValue,
    5188             :                                  double *padfWeightsHorizontal,
    5189             :                                  double *padfWeightsVertical,
    5190             :                                  double &dfInvWeights)
    5191             : {
    5192             :     return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
    5193             :                                      padfWeightsHorizontal, padfWeightsVertical,
    5194             :                                      dfInvWeights);
    5195             : }
    5196             : 
    5197             : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
    5198             : 
    5199             : #endif /* defined(USE_SSE2) */
    5200             : 
    5201             : /************************************************************************/
    5202             : /*                     GWKRoundSourceCoordinates()                      */
    5203             : /************************************************************************/
    5204             : 
    5205        1000 : static void GWKRoundSourceCoordinates(
    5206             :     int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
    5207             :     double dfSrcCoordPrecision, double dfErrorThreshold,
    5208             :     GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
    5209             :     double dfDstY)
    5210             : {
    5211        1000 :     double dfPct = 0.8;
    5212        1000 :     if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
    5213             :     {
    5214        1000 :         dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
    5215             :     }
    5216        1000 :     const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
    5217             : 
    5218      501000 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5219             :     {
    5220      500000 :         const double dfXBefore = padfX[iDstX];
    5221      500000 :         const double dfYBefore = padfY[iDstX];
    5222      500000 :         padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5223             :                        dfSrcCoordPrecision;
    5224      500000 :         padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5225             :                        dfSrcCoordPrecision;
    5226             : 
    5227             :         // If we are in an uncertainty zone, go to non-approximated
    5228             :         // transformation.
    5229             :         // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
    5230             :         // be at least 10 times greater than the approximation error.
    5231      500000 :         if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
    5232      399914 :             fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
    5233             :         {
    5234      180090 :             padfX[iDstX] = iDstX + dfDstXOff;
    5235      180090 :             padfY[iDstX] = dfDstY;
    5236      180090 :             padfZ[iDstX] = 0.0;
    5237      180090 :             pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
    5238      180090 :                            padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
    5239      180090 :             padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
    5240             :                            dfSrcCoordPrecision;
    5241      180090 :             padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
    5242             :                            dfSrcCoordPrecision;
    5243             :         }
    5244             :     }
    5245        1000 : }
    5246             : 
    5247             : /************************************************************************/
    5248             : /*                    GWKCheckAndComputeSrcOffsets()                    */
    5249             : /************************************************************************/
    5250             : static CPL_INLINE bool
    5251   190187000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
    5252             :                              int _iDstY, double *_padfX, double *_padfY,
    5253             :                              int _nSrcXSize, int _nSrcYSize,
    5254             :                              GPtrDiff_t &iSrcOffset)
    5255             : {
    5256   190187000 :     const GDALWarpKernel *_poWK = psJob->poWK;
    5257   196802000 :     for (int iTry = 0; iTry < 2; ++iTry)
    5258             :     {
    5259   196802000 :         if (iTry == 1)
    5260             :         {
    5261             :             // If the source coordinate is slightly outside of the source raster
    5262             :             // retry to transform it alone, so that the exact coordinate
    5263             :             // transformer is used.
    5264             : 
    5265     6614120 :             _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
    5266     6614120 :             _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
    5267     6614120 :             double dfZ = 0;
    5268     6614120 :             _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
    5269     6614120 :                                   _padfX + _iDstX, _padfY + _iDstX, &dfZ,
    5270     6614120 :                                   _pabSuccess + _iDstX);
    5271             :         }
    5272   196802000 :         if (!_pabSuccess[_iDstX])
    5273     3619620 :             return false;
    5274             : 
    5275             :         // If this happens this is likely the symptom of a bug somewhere.
    5276   193182000 :         if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
    5277             :         {
    5278             :             static bool bNanCoordFound = false;
    5279           0 :             if (!bNanCoordFound)
    5280             :             {
    5281           0 :                 CPLDebug("WARP",
    5282             :                          "GWKCheckAndComputeSrcOffsets(): "
    5283             :                          "NaN coordinate found on point %d.",
    5284             :                          _iDstX);
    5285           0 :                 bNanCoordFound = true;
    5286             :             }
    5287           0 :             return false;
    5288             :         }
    5289             : 
    5290             :         /* --------------------------------------------------------------------
    5291             :          */
    5292             :         /*      Figure out what pixel we want in our source raster, and skip */
    5293             :         /*      further processing if it is well off the source image. */
    5294             :         /* --------------------------------------------------------------------
    5295             :          */
    5296             :         /* We test against the value before casting to avoid the */
    5297             :         /* problem of asymmetric truncation effects around zero.  That is */
    5298             :         /* -0.5 will be 0 when cast to an int. */
    5299   193182000 :         if (_padfX[_iDstX] < _poWK->nSrcXOff)
    5300             :         {
    5301             :             // If the source coordinate is slightly outside of the source raster
    5302             :             // retry to transform it alone, so that the exact coordinate
    5303             :             // transformer is used.
    5304    17441500 :             if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
    5305     2892850 :                 continue;
    5306    14548600 :             return false;
    5307             :         }
    5308             : 
    5309   175740000 :         if (_padfY[_iDstX] < _poWK->nSrcYOff)
    5310             :         {
    5311             :             // If the source coordinate is slightly outside of the source raster
    5312             :             // retry to transform it alone, so that the exact coordinate
    5313             :             // transformer is used.
    5314     8491820 :             if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
    5315      638882 :                 continue;
    5316     7852940 :             return false;
    5317             :         }
    5318             : 
    5319             :         // Check for potential overflow when casting from float to int, (if
    5320             :         // operating outside natural projection area, padfX/Y can be a very huge
    5321             :         // positive number before doing the actual conversion), as such cast is
    5322             :         // undefined behavior that can trigger exception with some compilers
    5323             :         // (see #6753)
    5324   167249000 :         if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
    5325             :         {
    5326             :             // If the source coordinate is slightly outside of the source raster
    5327             :             // retry to transform it alone, so that the exact coordinate
    5328             :             // transformer is used.
    5329    13456100 :             if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
    5330     2714910 :                 continue;
    5331    10741100 :             return false;
    5332             :         }
    5333   153793000 :         if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
    5334             :         {
    5335             :             // If the source coordinate is slightly outside of the source raster
    5336             :             // retry to transform it alone, so that the exact coordinate
    5337             :             // transformer is used.
    5338     5815260 :             if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
    5339      367484 :                 continue;
    5340     5447770 :             return false;
    5341             :         }
    5342             : 
    5343   147977000 :         break;
    5344             :     }
    5345             : 
    5346   147977000 :     int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
    5347   147977000 :     int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
    5348   147977000 :     if (iSrcX == _nSrcXSize)
    5349           0 :         iSrcX--;
    5350   147977000 :     if (iSrcY == _nSrcYSize)
    5351           0 :         iSrcY--;
    5352             : 
    5353             :     // Those checks should normally be OK given the previous ones.
    5354   147977000 :     CPLAssert(iSrcX >= 0);
    5355   147977000 :     CPLAssert(iSrcY >= 0);
    5356   147977000 :     CPLAssert(iSrcX < _nSrcXSize);
    5357   147977000 :     CPLAssert(iSrcY < _nSrcYSize);
    5358             : 
    5359   147977000 :     iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
    5360             : 
    5361   147977000 :     return true;
    5362             : }
    5363             : 
    5364             : /************************************************************************/
    5365             : /*                 GWKOneSourceCornerFailsToReproject()                 */
    5366             : /************************************************************************/
    5367             : 
    5368         939 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
    5369             : {
    5370         939 :     GDALWarpKernel *poWK = psJob->poWK;
    5371        2805 :     for (int iY = 0; iY <= 1; ++iY)
    5372             :     {
    5373        5605 :         for (int iX = 0; iX <= 1; ++iX)
    5374             :         {
    5375        3739 :             double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
    5376        3739 :             double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
    5377        3739 :             double dfZTmp = 0;
    5378        3739 :             int nSuccess = FALSE;
    5379        3739 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
    5380             :                                  &dfYTmp, &dfZTmp, &nSuccess);
    5381        3739 :             if (!nSuccess)
    5382           7 :                 return true;
    5383             :         }
    5384             :     }
    5385         932 :     return false;
    5386             : }
    5387             : 
    5388             : /************************************************************************/
    5389             : /*                      GWKAdjustSrcOffsetOnEdge()                      */
    5390             : /************************************************************************/
    5391             : 
    5392        9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
    5393             :                                      GPtrDiff_t &iSrcOffset)
    5394             : {
    5395        9714 :     GDALWarpKernel *poWK = psJob->poWK;
    5396        9714 :     const int nSrcXSize = poWK->nSrcXSize;
    5397        9714 :     const int nSrcYSize = poWK->nSrcYSize;
    5398             : 
    5399             :     // Check if the computed source position slightly altered
    5400             :     // fails to reproject. If so, then we are at the edge of
    5401             :     // the validity area, and it is worth checking neighbour
    5402             :     // source pixels for validity.
    5403        9714 :     int nSuccess = FALSE;
    5404             :     {
    5405        9714 :         double dfXTmp =
    5406        9714 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5407        9714 :         double dfYTmp =
    5408        9714 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5409        9714 :         double dfZTmp = 0;
    5410        9714 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5411             :                              &dfZTmp, &nSuccess);
    5412             :     }
    5413        9714 :     if (nSuccess)
    5414             :     {
    5415        6996 :         double dfXTmp =
    5416        6996 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5417        6996 :         double dfYTmp =
    5418        6996 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5419        6996 :         double dfZTmp = 0;
    5420        6996 :         nSuccess = FALSE;
    5421        6996 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5422             :                              &dfZTmp, &nSuccess);
    5423             :     }
    5424        9714 :     if (nSuccess)
    5425             :     {
    5426        5624 :         double dfXTmp =
    5427        5624 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5428        5624 :         double dfYTmp =
    5429        5624 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5430        5624 :         double dfZTmp = 0;
    5431        5624 :         nSuccess = FALSE;
    5432        5624 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5433             :                              &dfZTmp, &nSuccess);
    5434             :     }
    5435             : 
    5436       14166 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5437        4452 :         CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
    5438             :     {
    5439        1860 :         iSrcOffset++;
    5440        1860 :         return true;
    5441             :     }
    5442       10290 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5443        2436 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
    5444             :     {
    5445        1334 :         iSrcOffset += nSrcXSize;
    5446        1334 :         return true;
    5447             :     }
    5448        7838 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5449        1318 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
    5450             :     {
    5451         956 :         iSrcOffset--;
    5452         956 :         return true;
    5453             :     }
    5454        5924 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5455         360 :              CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
    5456             :     {
    5457         340 :         iSrcOffset -= nSrcXSize;
    5458         340 :         return true;
    5459             :     }
    5460             : 
    5461        5224 :     return false;
    5462             : }
    5463             : 
    5464             : /************************************************************************/
    5465             : /*             GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()              */
    5466             : /************************************************************************/
    5467             : 
    5468           0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
    5469             :                                                       GPtrDiff_t &iSrcOffset)
    5470             : {
    5471           0 :     GDALWarpKernel *poWK = psJob->poWK;
    5472           0 :     const int nSrcXSize = poWK->nSrcXSize;
    5473           0 :     const int nSrcYSize = poWK->nSrcYSize;
    5474             : 
    5475             :     // Check if the computed source position slightly altered
    5476             :     // fails to reproject. If so, then we are at the edge of
    5477             :     // the validity area, and it is worth checking neighbour
    5478             :     // source pixels for validity.
    5479           0 :     int nSuccess = FALSE;
    5480             :     {
    5481           0 :         double dfXTmp =
    5482           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5483           0 :         double dfYTmp =
    5484           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5485           0 :         double dfZTmp = 0;
    5486           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5487             :                              &dfZTmp, &nSuccess);
    5488             :     }
    5489           0 :     if (nSuccess)
    5490             :     {
    5491           0 :         double dfXTmp =
    5492           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
    5493           0 :         double dfYTmp =
    5494           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
    5495           0 :         double dfZTmp = 0;
    5496           0 :         nSuccess = FALSE;
    5497           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5498             :                              &dfZTmp, &nSuccess);
    5499             :     }
    5500           0 :     if (nSuccess)
    5501             :     {
    5502           0 :         double dfXTmp =
    5503           0 :             poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
    5504           0 :         double dfYTmp =
    5505           0 :             poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
    5506           0 :         double dfZTmp = 0;
    5507           0 :         nSuccess = FALSE;
    5508           0 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
    5509             :                              &dfZTmp, &nSuccess);
    5510             :     }
    5511             : 
    5512           0 :     if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
    5513           0 :         poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
    5514             :             SRC_DENSITY_THRESHOLD_FLOAT)
    5515             :     {
    5516           0 :         iSrcOffset++;
    5517           0 :         return true;
    5518             :     }
    5519           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
    5520           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
    5521             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5522             :     {
    5523           0 :         iSrcOffset += nSrcXSize;
    5524           0 :         return true;
    5525             :     }
    5526           0 :     else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
    5527           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
    5528             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5529             :     {
    5530           0 :         iSrcOffset--;
    5531           0 :         return true;
    5532             :     }
    5533           0 :     else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
    5534           0 :              poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
    5535             :                  SRC_DENSITY_THRESHOLD_FLOAT)
    5536             :     {
    5537           0 :         iSrcOffset -= nSrcXSize;
    5538           0 :         return true;
    5539             :     }
    5540             : 
    5541           0 :     return false;
    5542             : }
    5543             : 
    5544             : /************************************************************************/
    5545             : /*                           GWKGeneralCase()                           */
    5546             : /*                                                                      */
    5547             : /*      This is the most general case.  It attempts to handle all       */
    5548             : /*      possible features with relatively little concern for            */
    5549             : /*      efficiency.                                                     */
    5550             : /************************************************************************/
    5551             : 
    5552         239 : static void GWKGeneralCaseThread(void *pData)
    5553             : {
    5554         239 :     GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
    5555         239 :     GDALWarpKernel *poWK = psJob->poWK;
    5556         239 :     const int iYMin = psJob->iYMin;
    5557         239 :     const int iYMax = psJob->iYMax;
    5558             :     const double dfMultFactorVerticalShiftPipeline =
    5559         239 :         poWK->bApplyVerticalShift
    5560         239 :             ? CPLAtof(CSLFetchNameValueDef(
    5561           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5562             :                   "1.0"))
    5563         239 :             : 0.0;
    5564             :     const bool bAvoidNoDataSingleBand =
    5565         239 :         poWK->nBands == 1 ||
    5566           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5567         239 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5568             : 
    5569         239 :     int nDstXSize = poWK->nDstXSize;
    5570         239 :     int nSrcXSize = poWK->nSrcXSize;
    5571         239 :     int nSrcYSize = poWK->nSrcYSize;
    5572             : 
    5573             :     /* -------------------------------------------------------------------- */
    5574             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5575             :     /*      scanlines worth of positions.                                   */
    5576             :     /* -------------------------------------------------------------------- */
    5577             :     // For x, 2 *, because we cache the precomputed values at the end.
    5578             :     double *padfX =
    5579         239 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5580             :     double *padfY =
    5581         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5582             :     double *padfZ =
    5583         239 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5584         239 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5585             : 
    5586         239 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5587             : 
    5588         239 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5589         239 :     if (poWK->eResample != GRA_NearestNeighbour)
    5590             :     {
    5591         220 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5592             :     }
    5593         239 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5594         239 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5595         239 :     const double dfErrorThreshold = CPLAtof(
    5596         239 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5597             : 
    5598             :     const bool bOneSourceCornerFailsToReproject =
    5599         239 :         GWKOneSourceCornerFailsToReproject(psJob);
    5600             : 
    5601             :     // Precompute values.
    5602        6469 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5603        6230 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5604             : 
    5605             :     /* ==================================================================== */
    5606             :     /*      Loop over output lines.                                         */
    5607             :     /* ==================================================================== */
    5608        6469 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5609             :     {
    5610             :         /* --------------------------------------------------------------------
    5611             :          */
    5612             :         /*      Setup points to transform to source image space. */
    5613             :         /* --------------------------------------------------------------------
    5614             :          */
    5615        6230 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5616        6230 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5617      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5618      236160 :             padfY[iDstX] = dfY;
    5619        6230 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5620             : 
    5621             :         /* --------------------------------------------------------------------
    5622             :          */
    5623             :         /*      Transform the points from destination pixel/line coordinates */
    5624             :         /*      to source pixel/line coordinates. */
    5625             :         /* --------------------------------------------------------------------
    5626             :          */
    5627        6230 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5628             :                              padfY, padfZ, pabSuccess);
    5629        6230 :         if (dfSrcCoordPrecision > 0.0)
    5630             :         {
    5631           0 :             GWKRoundSourceCoordinates(
    5632             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5633             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5634           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5635             :         }
    5636             : 
    5637             :         /* ====================================================================
    5638             :          */
    5639             :         /*      Loop over pixels in output scanline. */
    5640             :         /* ====================================================================
    5641             :          */
    5642      242390 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5643             :         {
    5644      236160 :             GPtrDiff_t iSrcOffset = 0;
    5645      236160 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5646             :                                               padfX, padfY, nSrcXSize,
    5647             :                                               nSrcYSize, iSrcOffset))
    5648           0 :                 continue;
    5649             : 
    5650             :             /* --------------------------------------------------------------------
    5651             :              */
    5652             :             /*      Do not try to apply transparent/invalid source pixels to the
    5653             :              */
    5654             :             /*      destination.  This currently ignores the multi-pixel input
    5655             :              */
    5656             :             /*      of bilinear and cubic resamples. */
    5657             :             /* --------------------------------------------------------------------
    5658             :              */
    5659      236160 :             double dfDensity = 1.0;
    5660             : 
    5661      236160 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5662             :             {
    5663        1200 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5664        1200 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5665             :                 {
    5666           0 :                     if (!bOneSourceCornerFailsToReproject)
    5667             :                     {
    5668           0 :                         continue;
    5669             :                     }
    5670           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5671             :                                  psJob, iSrcOffset))
    5672             :                     {
    5673           0 :                         dfDensity =
    5674           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5675             :                     }
    5676             :                     else
    5677             :                     {
    5678           0 :                         continue;
    5679             :                     }
    5680             :                 }
    5681             :             }
    5682             : 
    5683      236160 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5684           0 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5685             :             {
    5686           0 :                 if (!bOneSourceCornerFailsToReproject)
    5687             :                 {
    5688           0 :                     continue;
    5689             :                 }
    5690           0 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5691             :                 {
    5692           0 :                     continue;
    5693             :                 }
    5694             :             }
    5695             : 
    5696             :             /* ====================================================================
    5697             :              */
    5698             :             /*      Loop processing each band. */
    5699             :             /* ====================================================================
    5700             :              */
    5701      236160 :             bool bHasFoundDensity = false;
    5702             : 
    5703      236160 :             const GPtrDiff_t iDstOffset =
    5704      236160 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5705      472320 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5706             :             {
    5707      236160 :                 double dfBandDensity = 0.0;
    5708      236160 :                 double dfValueReal = 0.0;
    5709      236160 :                 double dfValueImag = 0.0;
    5710             : 
    5711             :                 /* --------------------------------------------------------------------
    5712             :                  */
    5713             :                 /*      Collect the source value. */
    5714             :                 /* --------------------------------------------------------------------
    5715             :                  */
    5716      236160 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    5717             :                     nSrcYSize == 1)
    5718             :                 {
    5719             :                     // FALSE is returned if dfBandDensity == 0, which is
    5720             :                     // checked below.
    5721         568 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValue(
    5722             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
    5723             :                         &dfValueImag));
    5724             :                 }
    5725      235592 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    5726             :                 {
    5727         248 :                     GWKBilinearResample4Sample(
    5728         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5729         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5730             :                         &dfValueReal, &dfValueImag);
    5731             :                 }
    5732      235344 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    5733             :                 {
    5734         248 :                     GWKCubicResample4Sample(
    5735         248 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5736         248 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5737             :                         &dfValueReal, &dfValueImag);
    5738             :                 }
    5739             :                 else
    5740             : #ifdef DEBUG
    5741             :                     // Only useful for clang static analyzer.
    5742      235096 :                     if (psWrkStruct != nullptr)
    5743             : #endif
    5744             :                     {
    5745      235096 :                         psWrkStruct->pfnGWKResample(
    5746      235096 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    5747      235096 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    5748             :                             &dfValueReal, &dfValueImag, psWrkStruct);
    5749             :                     }
    5750             : 
    5751             :                 // If we didn't find any valid inputs skip to next band.
    5752      236160 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    5753           0 :                     continue;
    5754             : 
    5755      236160 :                 if (poWK->bApplyVerticalShift)
    5756             :                 {
    5757           0 :                     if (!std::isfinite(padfZ[iDstX]))
    5758           0 :                         continue;
    5759             :                     // Subtract padfZ[] since the coordinate transformation is
    5760             :                     // from target to source
    5761           0 :                     dfValueReal =
    5762           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    5763           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    5764             :                 }
    5765             : 
    5766      236160 :                 bHasFoundDensity = true;
    5767             : 
    5768             :                 /* --------------------------------------------------------------------
    5769             :                  */
    5770             :                 /*      We have a computed value from the source.  Now apply it
    5771             :                  * to      */
    5772             :                 /*      the destination pixel. */
    5773             :                 /* --------------------------------------------------------------------
    5774             :                  */
    5775      236160 :                 GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    5776             :                                  dfValueReal, dfValueImag,
    5777             :                                  bAvoidNoDataSingleBand);
    5778             :             }
    5779             : 
    5780      236160 :             if (!bHasFoundDensity)
    5781           0 :                 continue;
    5782             : 
    5783      236160 :             if (!bAvoidNoDataSingleBand)
    5784             :             {
    5785           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    5786             :             }
    5787             : 
    5788             :             /* --------------------------------------------------------------------
    5789             :              */
    5790             :             /*      Update destination density/validity masks. */
    5791             :             /* --------------------------------------------------------------------
    5792             :              */
    5793      236160 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    5794             : 
    5795      236160 :             if (poWK->panDstValid != nullptr)
    5796             :             {
    5797           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    5798             :             }
    5799             :         } /* Next iDstX */
    5800             : 
    5801             :         /* --------------------------------------------------------------------
    5802             :          */
    5803             :         /*      Report progress to the user, and optionally cancel out. */
    5804             :         /* --------------------------------------------------------------------
    5805             :          */
    5806        6230 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    5807           0 :             break;
    5808             :     }
    5809             : 
    5810             :     /* -------------------------------------------------------------------- */
    5811             :     /*      Cleanup and return.                                             */
    5812             :     /* -------------------------------------------------------------------- */
    5813         239 :     CPLFree(padfX);
    5814         239 :     CPLFree(padfY);
    5815         239 :     CPLFree(padfZ);
    5816         239 :     CPLFree(pabSuccess);
    5817         239 :     if (psWrkStruct)
    5818         220 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    5819         239 : }
    5820             : 
    5821         239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
    5822             : {
    5823         239 :     return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
    5824             : }
    5825             : 
    5826             : /************************************************************************/
    5827             : /*                            GWKRealCase()                             */
    5828             : /*                                                                      */
    5829             : /*      General case for non-complex data types.                        */
    5830             : /************************************************************************/
    5831             : 
    5832         223 : static void GWKRealCaseThread(void *pData)
    5833             : 
    5834             : {
    5835         223 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    5836         223 :     GDALWarpKernel *poWK = psJob->poWK;
    5837         223 :     const int iYMin = psJob->iYMin;
    5838         223 :     const int iYMax = psJob->iYMax;
    5839             : 
    5840         223 :     const int nDstXSize = poWK->nDstXSize;
    5841         223 :     const int nSrcXSize = poWK->nSrcXSize;
    5842         223 :     const int nSrcYSize = poWK->nSrcYSize;
    5843             :     const double dfMultFactorVerticalShiftPipeline =
    5844         223 :         poWK->bApplyVerticalShift
    5845         223 :             ? CPLAtof(CSLFetchNameValueDef(
    5846           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    5847             :                   "1.0"))
    5848         223 :             : 0.0;
    5849             :     const bool bAvoidNoDataSingleBand =
    5850         305 :         poWK->nBands == 1 ||
    5851          82 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    5852         223 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    5853             : 
    5854             :     /* -------------------------------------------------------------------- */
    5855             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    5856             :     /*      scanlines worth of positions.                                   */
    5857             :     /* -------------------------------------------------------------------- */
    5858             : 
    5859             :     // For x, 2 *, because we cache the precomputed values at the end.
    5860             :     double *padfX =
    5861         223 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    5862             :     double *padfY =
    5863         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5864             :     double *padfZ =
    5865         223 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    5866         223 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    5867             : 
    5868         223 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    5869             : 
    5870         223 :     GWKResampleWrkStruct *psWrkStruct = nullptr;
    5871         223 :     if (poWK->eResample != GRA_NearestNeighbour)
    5872             :     {
    5873         181 :         psWrkStruct = GWKResampleCreateWrkStruct(poWK);
    5874             :     }
    5875         223 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    5876         223 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    5877         223 :     const double dfErrorThreshold = CPLAtof(
    5878         223 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    5879             : 
    5880         638 :     const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
    5881         415 :                                    poWK->papanBandSrcValid == nullptr &&
    5882         192 :                                    poWK->pafUnifiedSrcDensity != nullptr;
    5883             : 
    5884             :     const bool bOneSourceCornerFailsToReproject =
    5885         223 :         GWKOneSourceCornerFailsToReproject(psJob);
    5886             : 
    5887             :     // Precompute values.
    5888       24657 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5889       24434 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    5890             : 
    5891             :     /* ==================================================================== */
    5892             :     /*      Loop over output lines.                                         */
    5893             :     /* ==================================================================== */
    5894       25909 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    5895             :     {
    5896             :         /* --------------------------------------------------------------------
    5897             :          */
    5898             :         /*      Setup points to transform to source image space. */
    5899             :         /* --------------------------------------------------------------------
    5900             :          */
    5901       25686 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    5902       25686 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    5903    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5904    44568500 :             padfY[iDstX] = dfY;
    5905       25686 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    5906             : 
    5907             :         /* --------------------------------------------------------------------
    5908             :          */
    5909             :         /*      Transform the points from destination pixel/line coordinates */
    5910             :         /*      to source pixel/line coordinates. */
    5911             :         /* --------------------------------------------------------------------
    5912             :          */
    5913       25686 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    5914             :                              padfY, padfZ, pabSuccess);
    5915       25686 :         if (dfSrcCoordPrecision > 0.0)
    5916             :         {
    5917           0 :             GWKRoundSourceCoordinates(
    5918             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    5919             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    5920           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    5921             :         }
    5922             : 
    5923             :         /* ====================================================================
    5924             :          */
    5925             :         /*      Loop over pixels in output scanline. */
    5926             :         /* ====================================================================
    5927             :          */
    5928    44594200 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    5929             :         {
    5930    44568500 :             GPtrDiff_t iSrcOffset = 0;
    5931    44568500 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    5932             :                                               padfX, padfY, nSrcXSize,
    5933             :                                               nSrcYSize, iSrcOffset))
    5934    43823900 :                 continue;
    5935             : 
    5936             :             /* --------------------------------------------------------------------
    5937             :              */
    5938             :             /*      Do not try to apply transparent/invalid source pixels to the
    5939             :              */
    5940             :             /*      destination.  This currently ignores the multi-pixel input
    5941             :              */
    5942             :             /*      of bilinear and cubic resamples. */
    5943             :             /* --------------------------------------------------------------------
    5944             :              */
    5945    31812400 :             double dfDensity = 1.0;
    5946             : 
    5947    31812400 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    5948             :             {
    5949     1669560 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5950     1669560 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    5951             :                 {
    5952     1538480 :                     if (!bOneSourceCornerFailsToReproject)
    5953             :                     {
    5954     1538480 :                         continue;
    5955             :                     }
    5956           0 :                     else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
    5957             :                                  psJob, iSrcOffset))
    5958             :                     {
    5959           0 :                         dfDensity =
    5960           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    5961             :                     }
    5962             :                     else
    5963             :                     {
    5964           0 :                         continue;
    5965             :                     }
    5966             :                 }
    5967             :             }
    5968             : 
    5969    59903100 :             if (poWK->panUnifiedSrcValid != nullptr &&
    5970    29629200 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    5971             :             {
    5972    29531600 :                 if (!bOneSourceCornerFailsToReproject)
    5973             :                 {
    5974    29529300 :                     continue;
    5975             :                 }
    5976        2229 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    5977             :                 {
    5978           0 :                     continue;
    5979             :                 }
    5980             :             }
    5981             : 
    5982             :             /* ====================================================================
    5983             :              */
    5984             :             /*      Loop processing each band. */
    5985             :             /* ====================================================================
    5986             :              */
    5987      744578 :             bool bHasFoundDensity = false;
    5988             : 
    5989      744578 :             const GPtrDiff_t iDstOffset =
    5990      744578 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    5991     2092550 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    5992             :             {
    5993     1347980 :                 double dfBandDensity = 0.0;
    5994     1347980 :                 double dfValueReal = 0.0;
    5995             : 
    5996             :                 /* --------------------------------------------------------------------
    5997             :                  */
    5998             :                 /*      Collect the source value. */
    5999             :                 /* --------------------------------------------------------------------
    6000             :                  */
    6001     1347980 :                 if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
    6002             :                     nSrcYSize == 1)
    6003             :                 {
    6004             :                     // FALSE is returned if dfBandDensity == 0, which is
    6005             :                     // checked below.
    6006       15516 :                     CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
    6007             :                         poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
    6008             :                 }
    6009     1332460 :                 else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
    6010             :                 {
    6011        2046 :                     double dfValueImagIgnored = 0.0;
    6012        2046 :                     GWKBilinearResample4Sample(
    6013        2046 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6014        2046 :                         padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6015        2046 :                         &dfValueReal, &dfValueImagIgnored);
    6016             :                 }
    6017     1330410 :                 else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
    6018             :                 {
    6019      691552 :                     if (bSrcMaskIsDensity)
    6020             :                     {
    6021      389755 :                         if (poWK->eWorkingDataType == GDT_UInt8)
    6022             :                         {
    6023      389755 :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
    6024      389755 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6025      389755 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6026             :                                 &dfValueReal);
    6027             :                         }
    6028           0 :                         else if (poWK->eWorkingDataType == GDT_UInt16)
    6029             :                         {
    6030             :                             GWKCubicResampleSrcMaskIsDensity4SampleRealT<
    6031           0 :                                 GUInt16>(poWK, iBand,
    6032           0 :                                          padfX[iDstX] - poWK->nSrcXOff,
    6033           0 :                                          padfY[iDstX] - poWK->nSrcYOff,
    6034             :                                          &dfBandDensity, &dfValueReal);
    6035             :                         }
    6036             :                         else
    6037             :                         {
    6038           0 :                             GWKCubicResampleSrcMaskIsDensity4SampleReal(
    6039           0 :                                 poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6040           0 :                                 padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6041             :                                 &dfValueReal);
    6042             :                         }
    6043             :                     }
    6044             :                     else
    6045             :                     {
    6046      301797 :                         double dfValueImagIgnored = 0.0;
    6047      301797 :                         GWKCubicResample4Sample(
    6048      301797 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6049      301797 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6050             :                             &dfValueReal, &dfValueImagIgnored);
    6051      691552 :                     }
    6052             :                 }
    6053             :                 else
    6054             : #ifdef DEBUG
    6055             :                     // Only useful for clang static analyzer.
    6056      638861 :                     if (psWrkStruct != nullptr)
    6057             : #endif
    6058             :                     {
    6059      638861 :                         double dfValueImagIgnored = 0.0;
    6060      638861 :                         psWrkStruct->pfnGWKResample(
    6061      638861 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6062      638861 :                             padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
    6063             :                             &dfValueReal, &dfValueImagIgnored, psWrkStruct);
    6064             :                     }
    6065             : 
    6066             :                 // If we didn't find any valid inputs skip to next band.
    6067     1347980 :                 if (dfBandDensity < BAND_DENSITY_THRESHOLD)
    6068           0 :                     continue;
    6069             : 
    6070     1347980 :                 if (poWK->bApplyVerticalShift)
    6071             :                 {
    6072           0 :                     if (!std::isfinite(padfZ[iDstX]))
    6073           0 :                         continue;
    6074             :                     // Subtract padfZ[] since the coordinate transformation is
    6075             :                     // from target to source
    6076           0 :                     dfValueReal =
    6077           0 :                         dfValueReal * poWK->dfMultFactorVerticalShift -
    6078           0 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    6079             :                 }
    6080             : 
    6081     1347980 :                 bHasFoundDensity = true;
    6082             : 
    6083             :                 /* --------------------------------------------------------------------
    6084             :                  */
    6085             :                 /*      We have a computed value from the source.  Now apply it
    6086             :                  * to      */
    6087             :                 /*      the destination pixel. */
    6088             :                 /* --------------------------------------------------------------------
    6089             :                  */
    6090     1347980 :                 GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
    6091             :                                      dfValueReal, bAvoidNoDataSingleBand);
    6092             :             }
    6093             : 
    6094      744578 :             if (!bHasFoundDensity)
    6095           0 :                 continue;
    6096             : 
    6097      744578 :             if (!bAvoidNoDataSingleBand)
    6098             :             {
    6099      100295 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6100             :             }
    6101             : 
    6102             :             /* --------------------------------------------------------------------
    6103             :              */
    6104             :             /*      Update destination density/validity masks. */
    6105             :             /* --------------------------------------------------------------------
    6106             :              */
    6107      744578 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6108             : 
    6109      744578 :             if (poWK->panDstValid != nullptr)
    6110             :             {
    6111      104586 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6112             :             }
    6113             :         }  // Next iDstX.
    6114             : 
    6115             :         /* --------------------------------------------------------------------
    6116             :          */
    6117             :         /*      Report progress to the user, and optionally cancel out. */
    6118             :         /* --------------------------------------------------------------------
    6119             :          */
    6120       25686 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6121           0 :             break;
    6122             :     }
    6123             : 
    6124             :     /* -------------------------------------------------------------------- */
    6125             :     /*      Cleanup and return.                                             */
    6126             :     /* -------------------------------------------------------------------- */
    6127         223 :     CPLFree(padfX);
    6128         223 :     CPLFree(padfY);
    6129         223 :     CPLFree(padfZ);
    6130         223 :     CPLFree(pabSuccess);
    6131         223 :     if (psWrkStruct)
    6132         181 :         GWKResampleDeleteWrkStruct(psWrkStruct);
    6133         223 : }
    6134             : 
    6135         223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
    6136             : {
    6137         223 :     return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
    6138             : }
    6139             : 
    6140             : /************************************************************************/
    6141             : /*                 GWKCubicResampleNoMasks4MultiBandT()                 */
    6142             : /************************************************************************/
    6143             : 
    6144             : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
    6145             : /* and enough SSE registries */
    6146             : #if defined(USE_SSE2)
    6147             : 
    6148   142421000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
    6149             :                                  const __m128 row2, const __m128 row3,
    6150             :                                  const __m128 weightsXY0,
    6151             :                                  const __m128 weightsXY1,
    6152             :                                  const __m128 weightsXY2,
    6153             :                                  const __m128 weightsXY3)
    6154             : {
    6155   996949000 :     return XMMHorizontalAdd(_mm_add_ps(
    6156             :         _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
    6157             :         _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
    6158   142421000 :                    _mm_mul_ps(row3, weightsXY3))));
    6159             : }
    6160             : 
    6161             : template <class T>
    6162    48891642 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
    6163             :                                                double dfSrcX, double dfSrcY,
    6164             :                                                const GPtrDiff_t iDstOffset)
    6165             : {
    6166    48891642 :     const double dfSrcXShifted = dfSrcX - 0.5;
    6167    48891642 :     const int iSrcX = static_cast<int>(dfSrcXShifted);
    6168    48891642 :     const double dfSrcYShifted = dfSrcY - 0.5;
    6169    48891642 :     const int iSrcY = static_cast<int>(dfSrcYShifted);
    6170    48891642 :     const GPtrDiff_t iSrcOffset =
    6171    48891642 :         iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
    6172             : 
    6173             :     // Get the bilinear interpolation at the image borders.
    6174    48891642 :     if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
    6175    47609162 :         iSrcY + 2 >= poWK->nSrcYSize)
    6176             :     {
    6177     5671540 :         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6178             :         {
    6179             :             T value;
    6180     4253650 :             GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
    6181             :                                                &value);
    6182     4253650 :             reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6183             :                 value;
    6184     1417880 :         }
    6185             :     }
    6186             :     else
    6187             :     {
    6188    47473762 :         const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
    6189    47473762 :         const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
    6190             : 
    6191             :         float afCoeffsX[4];
    6192             :         float afCoeffsY[4];
    6193    47473762 :         GWKCubicComputeWeights(fDeltaX, afCoeffsX);
    6194    47473762 :         GWKCubicComputeWeights(fDeltaY, afCoeffsY);
    6195    47473762 :         const auto weightsX = _mm_loadu_ps(afCoeffsX);
    6196             :         const auto weightsXY0 =
    6197    94947524 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
    6198             :         const auto weightsXY1 =
    6199    94947524 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
    6200             :         const auto weightsXY2 =
    6201    94947524 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
    6202             :         const auto weightsXY3 =
    6203    47473762 :             _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
    6204             : 
    6205    47473762 :         const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
    6206             : 
    6207    47473762 :         int iBand = 0;
    6208             :         // Process 2 bands at a time
    6209    94947524 :         for (; iBand + 1 < poWK->nBands; iBand += 2)
    6210             :         {
    6211    47473762 :             const T *CPL_RESTRICT pBand0 =
    6212    47473762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6213    47473762 :             const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
    6214             :             const auto row1_0 =
    6215    47473762 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6216             :             const auto row2_0 =
    6217    47473762 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6218             :             const auto row3_0 =
    6219    47473762 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6220             : 
    6221    47473762 :             const T *CPL_RESTRICT pBand1 =
    6222    47473762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
    6223    47473762 :             const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
    6224             :             const auto row1_1 =
    6225    47473762 :                 XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
    6226             :             const auto row2_1 =
    6227    47473762 :                 XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
    6228             :             const auto row3_1 =
    6229    47473762 :                 XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
    6230             : 
    6231             :             const float fValue_0 =
    6232    47473762 :                 Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
    6233             :                              weightsXY1, weightsXY2, weightsXY3);
    6234             : 
    6235             :             const float fValue_1 =
    6236    47473762 :                 Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
    6237             :                              weightsXY1, weightsXY2, weightsXY3);
    6238             : 
    6239    47473762 :             T *CPL_RESTRICT pDstBand0 =
    6240    47473762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6241    47473762 :             pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
    6242             : 
    6243    47473762 :             T *CPL_RESTRICT pDstBand1 =
    6244    47473762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
    6245    47473762 :             pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
    6246             :         }
    6247    47473762 :         if (iBand < poWK->nBands)
    6248             :         {
    6249    47473762 :             const T *CPL_RESTRICT pBand0 =
    6250    47473762 :                 reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
    6251    47473762 :             const auto row0 = XMMLoad4Values(pBand0 + iOffset);
    6252             :             const auto row1 =
    6253    47473762 :                 XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
    6254             :             const auto row2 =
    6255    47473762 :                 XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
    6256             :             const auto row3 =
    6257    47473762 :                 XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
    6258             : 
    6259             :             const float fValue =
    6260    47473762 :                 Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
    6261             :                              weightsXY2, weightsXY3);
    6262             : 
    6263    47473762 :             T *CPL_RESTRICT pDstBand =
    6264    47473762 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
    6265    47473762 :             pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
    6266             :         }
    6267             :     }
    6268             : 
    6269    48891642 :     if (poWK->pafDstDensity)
    6270    46737601 :         poWK->pafDstDensity[iDstOffset] = 1.0f;
    6271    48891642 : }
    6272             : 
    6273             : #endif  // defined(USE_SSE2)
    6274             : 
    6275             : /************************************************************************/
    6276             : /*          GWKResampleNoMasksOrDstDensityOnlyThreadInternal()          */
    6277             : /************************************************************************/
    6278             : 
    6279             : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
    6280        2036 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
    6281             : 
    6282             : {
    6283        2036 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6284        2036 :     GDALWarpKernel *poWK = psJob->poWK;
    6285        2036 :     const int iYMin = psJob->iYMin;
    6286        2036 :     const int iYMax = psJob->iYMax;
    6287        2018 :     const double dfMultFactorVerticalShiftPipeline =
    6288        2036 :         poWK->bApplyVerticalShift
    6289          18 :             ? CPLAtof(CSLFetchNameValueDef(
    6290          18 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6291             :                   "1.0"))
    6292             :             : 0.0;
    6293             : 
    6294        2036 :     const int nDstXSize = poWK->nDstXSize;
    6295        2036 :     const int nSrcXSize = poWK->nSrcXSize;
    6296        2036 :     const int nSrcYSize = poWK->nSrcYSize;
    6297             : 
    6298             :     /* -------------------------------------------------------------------- */
    6299             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6300             :     /*      scanlines worth of positions.                                   */
    6301             :     /* -------------------------------------------------------------------- */
    6302             : 
    6303             :     // For x, 2 *, because we cache the precomputed values at the end.
    6304             :     double *padfX =
    6305        2036 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6306             :     double *padfY =
    6307        2036 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6308             :     double *padfZ =
    6309        2036 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6310        2036 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6311             : 
    6312        2036 :     const int nXRadius = poWK->nXRadius;
    6313             :     double *padfWeightsX =
    6314        2036 :         static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
    6315             :     double *padfWeightsY = static_cast<double *>(
    6316        2036 :         CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
    6317        2036 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6318        2036 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6319        2036 :     const double dfErrorThreshold = CPLAtof(
    6320        2036 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6321             : 
    6322             :     // Precompute values.
    6323      504244 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6324      502208 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6325             : 
    6326             :     /* ==================================================================== */
    6327             :     /*      Loop over output lines.                                         */
    6328             :     /* ==================================================================== */
    6329      324090 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6330             :     {
    6331             :         /* --------------------------------------------------------------------
    6332             :          */
    6333             :         /*      Setup points to transform to source image space. */
    6334             :         /* --------------------------------------------------------------------
    6335             :          */
    6336      322055 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6337      322055 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6338   111932457 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6339   111610449 :             padfY[iDstX] = dfY;
    6340      322055 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6341             : 
    6342             :         /* --------------------------------------------------------------------
    6343             :          */
    6344             :         /*      Transform the points from destination pixel/line coordinates */
    6345             :         /*      to source pixel/line coordinates. */
    6346             :         /* --------------------------------------------------------------------
    6347             :          */
    6348      322055 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6349             :                              padfY, padfZ, pabSuccess);
    6350      322055 :         if (dfSrcCoordPrecision > 0.0)
    6351             :         {
    6352        1000 :             GWKRoundSourceCoordinates(
    6353             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6354             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6355        1000 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6356             :         }
    6357             : 
    6358             :         /* ====================================================================
    6359             :          */
    6360             :         /*      Loop over pixels in output scanline. */
    6361             :         /* ====================================================================
    6362             :          */
    6363   111932457 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6364             :         {
    6365   111610449 :             GPtrDiff_t iSrcOffset = 0;
    6366   111610449 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6367             :                                               padfX, padfY, nSrcXSize,
    6368             :                                               nSrcYSize, iSrcOffset))
    6369    63085888 :                 continue;
    6370             : 
    6371             :             /* ====================================================================
    6372             :              */
    6373             :             /*      Loop processing each band. */
    6374             :             /* ====================================================================
    6375             :              */
    6376    97416161 :             const GPtrDiff_t iDstOffset =
    6377    97416161 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6378             : 
    6379             : #if defined(USE_SSE2)
    6380             :             if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
    6381             :                           (std::is_same<T, GByte>::value ||
    6382             :                            std::is_same<T, GUInt16>::value))
    6383             :             {
    6384    49957541 :                 if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
    6385             :                 {
    6386    48891642 :                     GWKCubicResampleNoMasks4MultiBandT<T>(
    6387    48891642 :                         poWK, padfX[iDstX] - poWK->nSrcXOff,
    6388    48891642 :                         padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
    6389             : 
    6390    48891642 :                     continue;
    6391             :                 }
    6392             :             }
    6393             : #endif  // defined(USE_SSE2)
    6394             : 
    6395    48524518 :             [[maybe_unused]] double dfInvWeights = 0;
    6396   133360926 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6397             :             {
    6398    84836804 :                 T value = 0;
    6399             :                 if constexpr (eResample == GRA_NearestNeighbour)
    6400             :                 {
    6401    77111130 :                     value = reinterpret_cast<T *>(
    6402    77111130 :                         poWK->papabySrcImage[iBand])[iSrcOffset];
    6403             :                 }
    6404             :                 else if constexpr (bUse4SamplesFormula)
    6405             :                 {
    6406             :                     if constexpr (eResample == GRA_Bilinear)
    6407     4042275 :                         GWKBilinearResampleNoMasks4SampleT(
    6408     4042275 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6409     4042275 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6410             :                     else
    6411     2301250 :                         GWKCubicResampleNoMasks4SampleT(
    6412     2301250 :                             poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6413     2301250 :                             padfY[iDstX] - poWK->nSrcYOff, &value);
    6414             :                 }
    6415             :                 else
    6416             :                 {
    6417     1382149 :                     GWKResampleNoMasksT(
    6418     1382149 :                         poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
    6419     1382149 :                         padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
    6420             :                         padfWeightsY, dfInvWeights);
    6421             :                 }
    6422             : 
    6423    84836804 :                 if (poWK->bApplyVerticalShift)
    6424             :                 {
    6425         818 :                     if (!std::isfinite(padfZ[iDstX]))
    6426           0 :                         continue;
    6427             :                     // Subtract padfZ[] since the coordinate transformation is
    6428             :                     // from target to source
    6429         818 :                     value = GWKClampValueT<T>(
    6430         818 :                         double(value) * poWK->dfMultFactorVerticalShift -
    6431         818 :                         padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6432             :                 }
    6433             : 
    6434    84836804 :                 if (poWK->pafDstDensity)
    6435    10261231 :                     poWK->pafDstDensity[iDstOffset] = 1.0f;
    6436             : 
    6437    84836804 :                 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
    6438             :                     value;
    6439             :             }
    6440             :         }
    6441             : 
    6442             :         /* --------------------------------------------------------------------
    6443             :          */
    6444             :         /*      Report progress to the user, and optionally cancel out. */
    6445             :         /* --------------------------------------------------------------------
    6446             :          */
    6447      322055 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6448           1 :             break;
    6449             :     }
    6450             : 
    6451             :     /* -------------------------------------------------------------------- */
    6452             :     /*      Cleanup and return.                                             */
    6453             :     /* -------------------------------------------------------------------- */
    6454        2036 :     CPLFree(padfX);
    6455        2036 :     CPLFree(padfY);
    6456        2036 :     CPLFree(padfZ);
    6457        2036 :     CPLFree(pabSuccess);
    6458        2036 :     CPLFree(padfWeightsX);
    6459        2036 :     CPLFree(padfWeightsY);
    6460        2036 : }
    6461             : 
    6462             : template <class T, GDALResampleAlg eResample>
    6463        1005 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
    6464             : {
    6465        1005 :     GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6466             :         pData);
    6467        1005 : }
    6468             : 
    6469             : template <class T, GDALResampleAlg eResample>
    6470        1031 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
    6471             : 
    6472             : {
    6473        1031 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6474        1031 :     GDALWarpKernel *poWK = psJob->poWK;
    6475             :     static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
    6476        1031 :     const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
    6477        1031 :     if (bUse4SamplesFormula)
    6478         976 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
    6479             :             pData);
    6480             :     else
    6481          55 :         GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
    6482             :             pData);
    6483        1031 : }
    6484             : 
    6485         954 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6486             : {
    6487         954 :     return GWKRun(
    6488             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
    6489         954 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
    6490             : }
    6491             : 
    6492         132 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6493             : {
    6494         132 :     return GWKRun(
    6495             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
    6496             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
    6497         132 :                                                            GRA_Bilinear>);
    6498             : }
    6499             : 
    6500         852 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6501             : {
    6502         852 :     return GWKRun(
    6503             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
    6504         852 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
    6505             : }
    6506             : 
    6507           9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6508             : {
    6509           9 :     return GWKRun(
    6510             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
    6511           9 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
    6512             : }
    6513             : 
    6514             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6515             : 
    6516             : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6517             : {
    6518             :     return GWKRun(
    6519             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
    6520             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
    6521             : }
    6522             : #endif
    6523             : 
    6524          12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
    6525             : {
    6526          12 :     return GWKRun(
    6527             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
    6528          12 :         GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
    6529             : }
    6530             : 
    6531             : /************************************************************************/
    6532             : /*                          GWKNearestByte()                            */
    6533             : /*                                                                      */
    6534             : /*      Case for 8bit input data with nearest neighbour resampling      */
    6535             : /*      using valid flags. Should be as fast as possible for this       */
    6536             : /*      particular transformation type.                                 */
    6537             : /************************************************************************/
    6538             : 
    6539         477 : template <class T> static void GWKNearestThread(void *pData)
    6540             : 
    6541             : {
    6542         477 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    6543         477 :     GDALWarpKernel *poWK = psJob->poWK;
    6544         477 :     const int iYMin = psJob->iYMin;
    6545         477 :     const int iYMax = psJob->iYMax;
    6546         476 :     const double dfMultFactorVerticalShiftPipeline =
    6547         477 :         poWK->bApplyVerticalShift
    6548           1 :             ? CPLAtof(CSLFetchNameValueDef(
    6549           1 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    6550             :                   "1.0"))
    6551             :             : 0.0;
    6552         477 :     const bool bAvoidNoDataSingleBand =
    6553         546 :         poWK->nBands == 1 ||
    6554          69 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    6555             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    6556             : 
    6557         477 :     const int nDstXSize = poWK->nDstXSize;
    6558         477 :     const int nSrcXSize = poWK->nSrcXSize;
    6559         477 :     const int nSrcYSize = poWK->nSrcYSize;
    6560             : 
    6561             :     /* -------------------------------------------------------------------- */
    6562             :     /*      Allocate x,y,z coordinate arrays for transformation ... one     */
    6563             :     /*      scanlines worth of positions.                                   */
    6564             :     /* -------------------------------------------------------------------- */
    6565             : 
    6566             :     // For x, 2 *, because we cache the precomputed values at the end.
    6567             :     double *padfX =
    6568         477 :         static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
    6569             :     double *padfY =
    6570         477 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6571             :     double *padfZ =
    6572         477 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    6573         477 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    6574             : 
    6575         477 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    6576         477 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    6577         477 :     const double dfErrorThreshold = CPLAtof(
    6578         477 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    6579             : 
    6580             :     const bool bOneSourceCornerFailsToReproject =
    6581         477 :         GWKOneSourceCornerFailsToReproject(psJob);
    6582             : 
    6583             :     // Precompute values.
    6584       80557 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6585       80080 :         padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
    6586             : 
    6587             :     /* ==================================================================== */
    6588             :     /*      Loop over output lines.                                         */
    6589             :     /* ==================================================================== */
    6590       64713 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    6591             :     {
    6592             : 
    6593             :         /* --------------------------------------------------------------------
    6594             :          */
    6595             :         /*      Setup points to transform to source image space. */
    6596             :         /* --------------------------------------------------------------------
    6597             :          */
    6598       64236 :         memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
    6599       64236 :         const double dfY = iDstY + 0.5 + poWK->nDstYOff;
    6600    33836599 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6601    33772442 :             padfY[iDstX] = dfY;
    6602       64236 :         memset(padfZ, 0, sizeof(double) * nDstXSize);
    6603             : 
    6604             :         /* --------------------------------------------------------------------
    6605             :          */
    6606             :         /*      Transform the points from destination pixel/line coordinates */
    6607             :         /*      to source pixel/line coordinates. */
    6608             :         /* --------------------------------------------------------------------
    6609             :          */
    6610       64236 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
    6611             :                              padfY, padfZ, pabSuccess);
    6612       64236 :         if (dfSrcCoordPrecision > 0.0)
    6613             :         {
    6614           0 :             GWKRoundSourceCoordinates(
    6615             :                 nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
    6616             :                 dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6617           0 :                 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
    6618             :         }
    6619             :         /* ====================================================================
    6620             :          */
    6621             :         /*      Loop over pixels in output scanline. */
    6622             :         /* ====================================================================
    6623             :          */
    6624    33836599 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6625             :         {
    6626    33772442 :             GPtrDiff_t iSrcOffset = 0;
    6627    33772442 :             if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
    6628             :                                               padfX, padfY, nSrcXSize,
    6629             :                                               nSrcYSize, iSrcOffset))
    6630    21383643 :                 continue;
    6631             : 
    6632             :             /* --------------------------------------------------------------------
    6633             :              */
    6634             :             /*      Do not try to apply invalid source pixels to the dest. */
    6635             :             /* --------------------------------------------------------------------
    6636             :              */
    6637    25227006 :             if (poWK->panUnifiedSrcValid != nullptr &&
    6638     6714445 :                 !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    6639             :             {
    6640     5120982 :                 if (!bOneSourceCornerFailsToReproject)
    6641             :                 {
    6642     5113496 :                     continue;
    6643             :                 }
    6644        7485 :                 else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
    6645             :                 {
    6646        5224 :                     continue;
    6647             :                 }
    6648             :             }
    6649             : 
    6650             :             /* --------------------------------------------------------------------
    6651             :              */
    6652             :             /*      Do not try to apply transparent source pixels to the
    6653             :              * destination.*/
    6654             :             /* --------------------------------------------------------------------
    6655             :              */
    6656    13393881 :             double dfDensity = 1.0;
    6657             : 
    6658    13393881 :             if (poWK->pafUnifiedSrcDensity != nullptr)
    6659             :             {
    6660     1557335 :                 dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    6661     1557335 :                 if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
    6662     1005075 :                     continue;
    6663             :             }
    6664             : 
    6665             :             /* ====================================================================
    6666             :              */
    6667             :             /*      Loop processing each band. */
    6668             :             /* ====================================================================
    6669             :              */
    6670             : 
    6671    12388799 :             const GPtrDiff_t iDstOffset =
    6672    12388799 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    6673             : 
    6674    27339660 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    6675             :             {
    6676    14950961 :                 T value = 0;
    6677    14950961 :                 double dfBandDensity = 0.0;
    6678             : 
    6679             :                 /* --------------------------------------------------------------------
    6680             :                  */
    6681             :                 /*      Collect the source value. */
    6682             :                 /* --------------------------------------------------------------------
    6683             :                  */
    6684    14950961 :                 if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
    6685             :                                  &value))
    6686             :                 {
    6687             : 
    6688    14950861 :                     if (poWK->bApplyVerticalShift)
    6689             :                     {
    6690           1 :                         if (!std::isfinite(padfZ[iDstX]))
    6691           0 :                             continue;
    6692             :                         // Subtract padfZ[] since the coordinate transformation
    6693             :                         // is from target to source
    6694           1 :                         value = GWKClampValueT<T>(
    6695           1 :                             double(value) * poWK->dfMultFactorVerticalShift -
    6696           1 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
    6697             :                     }
    6698             : 
    6699    14950861 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    6700             :                                           dfBandDensity, value,
    6701             :                                           bAvoidNoDataSingleBand);
    6702             :                 }
    6703             :             }
    6704             : 
    6705             :             /* --------------------------------------------------------------------
    6706             :              */
    6707             :             /*      Mark this pixel valid/opaque in the output. */
    6708             :             /* --------------------------------------------------------------------
    6709             :              */
    6710             : 
    6711    12388799 :             if (!bAvoidNoDataSingleBand)
    6712             :             {
    6713      424278 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    6714             :             }
    6715             : 
    6716    12388799 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    6717             : 
    6718    12388799 :             if (poWK->panDstValid != nullptr)
    6719             :             {
    6720    11118346 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    6721             :             }
    6722             :         } /* Next iDstX */
    6723             : 
    6724             :         /* --------------------------------------------------------------------
    6725             :          */
    6726             :         /*      Report progress to the user, and optionally cancel out. */
    6727             :         /* --------------------------------------------------------------------
    6728             :          */
    6729       64236 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    6730           0 :             break;
    6731             :     }
    6732             : 
    6733             :     /* -------------------------------------------------------------------- */
    6734             :     /*      Cleanup and return.                                             */
    6735             :     /* -------------------------------------------------------------------- */
    6736         477 :     CPLFree(padfX);
    6737         477 :     CPLFree(padfY);
    6738         477 :     CPLFree(padfZ);
    6739         477 :     CPLFree(pabSuccess);
    6740         477 : }
    6741             : 
    6742         363 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
    6743             : {
    6744         363 :     return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
    6745             : }
    6746             : 
    6747          14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6748             : {
    6749          14 :     return GWKRun(
    6750             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
    6751          14 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
    6752             : }
    6753             : 
    6754           5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6755             : {
    6756           5 :     return GWKRun(
    6757             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
    6758             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
    6759           5 :                                                            GRA_Bilinear>);
    6760             : }
    6761             : 
    6762           7 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6763             : {
    6764           7 :     return GWKRun(
    6765             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
    6766             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
    6767           7 :                                                            GRA_Bilinear>);
    6768             : }
    6769             : 
    6770           4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6771             : {
    6772           4 :     return GWKRun(
    6773             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
    6774             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
    6775           4 :                                                            GRA_Bilinear>);
    6776             : }
    6777             : 
    6778             : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
    6779             : 
    6780             : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
    6781             : {
    6782             :     return GWKRun(
    6783             :         poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
    6784             :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
    6785             :                                                            GRA_Bilinear>);
    6786             : }
    6787             : #endif
    6788             : 
    6789           5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6790             : {
    6791           5 :     return GWKRun(
    6792             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
    6793           5 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
    6794             : }
    6795             : 
    6796          14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6797             : {
    6798          14 :     return GWKRun(
    6799             :         poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
    6800          14 :         GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
    6801             : }
    6802             : 
    6803           6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
    6804             : {
    6805           6 :     return GWKRun(
    6806             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
    6807           6 :         GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
    6808             : }
    6809             : 
    6810           5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
    6811             : {
    6812           5 :     return GWKRun(
    6813             :         poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
    6814           5 :         GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
    6815             : }
    6816             : 
    6817           9 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK)
    6818             : {
    6819           9 :     return GWKRun(poWK, "GWKNearestInt8", GWKNearestThread<int8_t>);
    6820             : }
    6821             : 
    6822          40 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
    6823             : {
    6824          40 :     return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
    6825             : }
    6826             : 
    6827          10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
    6828             : {
    6829          10 :     return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
    6830             : }
    6831             : 
    6832          11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
    6833             : {
    6834          11 :     return GWKRun(
    6835             :         poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
    6836          11 :         GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
    6837             : }
    6838             : 
    6839          51 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
    6840             : {
    6841          51 :     return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
    6842             : }
    6843             : 
    6844             : /************************************************************************/
    6845             : /*                           GWKAverageOrMode()                         */
    6846             : /*                                                                      */
    6847             : /************************************************************************/
    6848             : 
    6849             : #define COMPUTE_WEIGHT_Y(iSrcY)                                                \
    6850             :     ((iSrcY == iSrcYMin)                                                       \
    6851             :          ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
    6852             :      : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
    6853             :                                : 1.0)
    6854             : 
    6855             : #define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
    6856             :     ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
    6857             :                                       ? dfWeightY                              \
    6858             :                                       : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
    6859             :      : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
    6860             :                                : dfWeightY)
    6861             : 
    6862             : static void GWKAverageOrModeThread(void *pData);
    6863             : 
    6864         246 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
    6865             : {
    6866         246 :     return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
    6867             : }
    6868             : 
    6869             : /************************************************************************/
    6870             : /*                 GWKAverageOrModeComputeLineCoords()                  */
    6871             : /************************************************************************/
    6872             : 
    6873       28663 : static void GWKAverageOrModeComputeLineCoords(
    6874             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6875             :     double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
    6876             :     int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
    6877             :     double dfErrorThreshold)
    6878             : {
    6879       28663 :     const GDALWarpKernel *poWK = psJob->poWK;
    6880       28663 :     const int nDstXSize = poWK->nDstXSize;
    6881             : 
    6882             :     // Setup points to transform to source image space.
    6883     7360890 :     for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    6884             :     {
    6885     7332220 :         padfX[iDstX] = iDstX + poWK->nDstXOff;
    6886     7332220 :         padfY[iDstX] = iDstY + poWK->nDstYOff;
    6887     7332220 :         padfZ[iDstX] = 0.0;
    6888     7332220 :         padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
    6889     7332220 :         padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
    6890     7332220 :         padfZ2[iDstX] = 0.0;
    6891             :     }
    6892             : 
    6893             :     /* ----------------------------------------------------------------- */
    6894             :     /*      Transform the points from destination pixel/line coordinates */
    6895             :     /*      to source pixel/line coordinates.                            */
    6896             :     /* ----------------------------------------------------------------- */
    6897       28663 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
    6898             :                          padfZ, pabSuccess);
    6899       28663 :     poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
    6900             :                          padfY2, padfZ2, pabSuccess2);
    6901             : 
    6902       28663 :     if (dfSrcCoordPrecision > 0.0)
    6903             :     {
    6904           0 :         GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
    6905             :                                   dfSrcCoordPrecision, dfErrorThreshold,
    6906           0 :                                   poWK->pfnTransformer, psJob->pTransformerArg,
    6907           0 :                                   poWK->nDstXOff, iDstY + poWK->nDstYOff);
    6908           0 :         GWKRoundSourceCoordinates(
    6909             :             nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
    6910           0 :             dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
    6911           0 :             1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
    6912             :     }
    6913       28663 : }
    6914             : 
    6915             : /************************************************************************/
    6916             : /*                GWKAverageOrModeComputeSourceCoords()                 */
    6917             : /************************************************************************/
    6918             : 
    6919     7332220 : static bool GWKAverageOrModeComputeSourceCoords(
    6920             :     const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
    6921             :     double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
    6922             :     // Output:
    6923             :     bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
    6924             :     double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
    6925             : {
    6926     7332220 :     const GDALWarpKernel *poWK = psJob->poWK;
    6927     7332220 :     const int nSrcXSize = poWK->nSrcXSize;
    6928     7332220 :     const int nSrcYSize = poWK->nSrcYSize;
    6929             : 
    6930             :     // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
    6931             :     // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
    6932     7332220 :     if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6933     6814810 :           padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
    6934     6814810 :           padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6935     6532210 :           padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
    6936     6532210 :           padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6937     5870420 :           padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
    6938     5865780 :           padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
    6939     5350790 :           padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
    6940             :     {
    6941     1985190 :         return false;
    6942             :     }
    6943             : 
    6944             :     // Compute corners in source crs.
    6945             : 
    6946             :     // The transformation might not have preserved ordering of
    6947             :     // coordinates so do the necessary swapping (#5433).
    6948             :     // NOTE: this is really an approximative fix. To do something
    6949             :     // more precise we would for example need to compute the
    6950             :     // transformation of coordinates in the
    6951             :     // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
    6952             :     // coordinates, and take the bounding box of the got source
    6953             :     // coordinates.
    6954             : 
    6955     5347040 :     if (padfX[iDstX] > padfX2[iDstX])
    6956      269148 :         std::swap(padfX[iDstX], padfX2[iDstX]);
    6957             : 
    6958             :     // Detect situations where the target pixel is close to the
    6959             :     // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
    6960             :     // close to the left-most and right-most columns of the source
    6961             :     // raster. The 2 value below was experimentally determined to
    6962             :     // avoid false-positives and false-negatives.
    6963             :     // Addresses https://github.com/OSGeo/gdal/issues/6478
    6964     5347040 :     bWrapOverX = false;
    6965     5347040 :     const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
    6966     5347040 :     if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
    6967     3298690 :         2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
    6968       55362 :         nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
    6969             :     {
    6970             :         // Check there is a discontinuity by checking at mid-pixel.
    6971             :         // NOTE: all this remains fragile. To confidently
    6972             :         // detect antimeridian warping we should probably try to access
    6973             :         // georeferenced coordinates, and not rely only on tests on
    6974             :         // image space coordinates. But accessing georeferenced
    6975             :         // coordinates from here is not trivial, and we would for example
    6976             :         // have to handle both geographic, Mercator, etc.
    6977             :         // Let's hope this heuristics is good enough for now.
    6978        1610 :         double x = iDstX + 0.5 + poWK->nDstXOff;
    6979        1610 :         double y = iDstY + poWK->nDstYOff;
    6980        1610 :         double z = 0;
    6981        1610 :         int bSuccess = FALSE;
    6982        1610 :         poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
    6983             :                              &bSuccess);
    6984        1610 :         if (bSuccess && x < padfX[iDstX])
    6985             :         {
    6986        1596 :             bWrapOverX = true;
    6987        1596 :             std::swap(padfX[iDstX], padfX2[iDstX]);
    6988        1596 :             padfX2[iDstX] += nSrcXSize;
    6989             :         }
    6990             :     }
    6991             : 
    6992     5347040 :     dfXMin = padfX[iDstX] - poWK->nSrcXOff;
    6993     5347040 :     dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
    6994     5347040 :     constexpr double EPSILON = 1e-10;
    6995             :     // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
    6996     5347040 :     if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
    6997       15528 :         return false;
    6998     5331510 :     iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
    6999     5331510 :     iSrcXMax = static_cast<int>(
    7000     5331510 :         std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
    7001     5331510 :     if (!bWrapOverX)
    7002     5329910 :         iSrcXMax = std::min(iSrcXMax, nSrcXSize);
    7003     5331510 :     if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
    7004         472 :         iSrcXMax++;
    7005             : 
    7006     5331510 :     if (padfY[iDstX] > padfY2[iDstX])
    7007      270117 :         std::swap(padfY[iDstX], padfY2[iDstX]);
    7008     5331510 :     dfYMin = padfY[iDstX] - poWK->nSrcYOff;
    7009     5331510 :     dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
    7010             :     // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
    7011     5331510 :     if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
    7012       13334 :         return false;
    7013     5318180 :     iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
    7014     5318180 :     iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
    7015     5318180 :     if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
    7016           0 :         iSrcYMax++;
    7017             : 
    7018     5318180 :     return true;
    7019             : }
    7020             : 
    7021             : /************************************************************************/
    7022             : /*                          GWKModeRealType()                           */
    7023             : /************************************************************************/
    7024             : 
    7025       17780 : template <class T> static inline bool IsSame(T a, T b)
    7026             : {
    7027       17780 :     return a == b;
    7028             : }
    7029             : 
    7030           0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
    7031             : {
    7032           0 :     return a == b || (CPLIsNan(a) && CPLIsNan(b));
    7033             : }
    7034             : 
    7035          18 : template <> bool IsSame<float>(float a, float b)
    7036             : {
    7037          18 :     return a == b || (std::isnan(a) && std::isnan(b));
    7038             : }
    7039             : 
    7040          56 : template <> bool IsSame<double>(double a, double b)
    7041             : {
    7042          56 :     return a == b || (std::isnan(a) && std::isnan(b));
    7043             : }
    7044             : 
    7045          19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
    7046             : {
    7047          19 :     const GDALWarpKernel *poWK = psJob->poWK;
    7048          19 :     const int iYMin = psJob->iYMin;
    7049          19 :     const int iYMax = psJob->iYMax;
    7050          19 :     const int nDstXSize = poWK->nDstXSize;
    7051          19 :     const int nSrcXSize = poWK->nSrcXSize;
    7052          19 :     const int nSrcYSize = poWK->nSrcYSize;
    7053          19 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7054             : 
    7055          19 :     T *pVals = nullptr;
    7056          19 :     float *pafCounts = nullptr;
    7057             : 
    7058          19 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7059             :     {
    7060             :         pVals = static_cast<T *>(
    7061          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
    7062             :         pafCounts = static_cast<float *>(
    7063          19 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7064          19 :         if (pVals == nullptr || pafCounts == nullptr)
    7065             :         {
    7066           0 :             VSIFree(pVals);
    7067           0 :             VSIFree(pafCounts);
    7068           0 :             return;
    7069             :         }
    7070             :     }
    7071             : 
    7072             :     /* -------------------------------------------------------------------- */
    7073             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7074             :     /*      scanlines worth of positions.                                   */
    7075             :     /* -------------------------------------------------------------------- */
    7076             : 
    7077             :     double *padfX =
    7078          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7079             :     double *padfY =
    7080          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7081             :     double *padfZ =
    7082          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7083             :     double *padfX2 =
    7084          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7085             :     double *padfY2 =
    7086          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7087             :     double *padfZ2 =
    7088          19 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7089          19 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7090          19 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7091             : 
    7092          19 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7093          19 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7094          19 :     const double dfErrorThreshold = CPLAtof(
    7095          19 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7096          19 :     const bool bAvoidNoDataSingleBand =
    7097          19 :         poWK->nBands == 1 ||
    7098           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7099             :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7100             : 
    7101          19 :     const int nXMargin =
    7102          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7103          19 :     const int nYMargin =
    7104          19 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7105             : 
    7106             :     /* ==================================================================== */
    7107             :     /*      Loop over output lines.                                         */
    7108             :     /* ==================================================================== */
    7109         116 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7110             :     {
    7111          97 :         GWKAverageOrModeComputeLineCoords(
    7112             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7113             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7114             : 
    7115             :         // Loop over pixels in output scanline.
    7116        3514 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7117             :         {
    7118        3417 :             GPtrDiff_t iSrcOffset = 0;
    7119        3417 :             double dfDensity = 1.0;
    7120        3417 :             bool bHasFoundDensity = false;
    7121             : 
    7122        3417 :             bool bWrapOverX = false;
    7123        3417 :             double dfXMin = 0;
    7124        3417 :             double dfYMin = 0;
    7125        3417 :             double dfXMax = 0;
    7126        3417 :             double dfYMax = 0;
    7127        3417 :             int iSrcXMin = 0;
    7128        3417 :             int iSrcYMin = 0;
    7129        3417 :             int iSrcXMax = 0;
    7130        3417 :             int iSrcYMax = 0;
    7131        3417 :             if (!GWKAverageOrModeComputeSourceCoords(
    7132             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7133             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7134             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7135             :             {
    7136           0 :                 continue;
    7137             :             }
    7138             : 
    7139        3417 :             const GPtrDiff_t iDstOffset =
    7140        3417 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7141             : 
    7142             :             // Loop processing each band.
    7143        6834 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7144             :             {
    7145        3417 :                 double dfBandDensity = 0.0;
    7146             : 
    7147        3417 :                 int nBins = 0;
    7148        3417 :                 int iModeIndex = -1;
    7149        3417 :                 T nVal{};
    7150             : 
    7151       10248 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7152             :                 {
    7153        6831 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7154        6831 :                     iSrcOffset =
    7155        6831 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7156       20530 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7157             :                          iSrcX++, iSrcOffset++)
    7158             :                     {
    7159       13699 :                         if (bWrapOverX)
    7160           0 :                             iSrcOffset =
    7161           0 :                                 (iSrcX % nSrcXSize) +
    7162           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7163             : 
    7164       13699 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7165           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7166           0 :                             continue;
    7167             : 
    7168       13699 :                         if (GWKGetPixelT(poWK, iBand, iSrcOffset,
    7169       27398 :                                          &dfBandDensity, &nVal) &&
    7170       13699 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7171             :                         {
    7172       13699 :                             const double dfWeight =
    7173       13699 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7174             : 
    7175             :                             // Check array for existing entry.
    7176       13699 :                             int i = 0;
    7177       29194 :                             for (i = 0; i < nBins; ++i)
    7178             :                             {
    7179       17807 :                                 if (IsSame(pVals[i], nVal))
    7180             :                                 {
    7181             : 
    7182        2312 :                                     pafCounts[i] +=
    7183        2312 :                                         static_cast<float>(dfWeight);
    7184        2312 :                                     bool bValIsMaxCount =
    7185        2312 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7186             : 
    7187        2312 :                                     if (!bValIsMaxCount &&
    7188        1498 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7189             :                                     {
    7190        1490 :                                         switch (eTieStrategy)
    7191             :                                         {
    7192        1477 :                                             case GWKTS_First:
    7193        1477 :                                                 break;
    7194           6 :                                             case GWKTS_Min:
    7195           6 :                                                 bValIsMaxCount =
    7196           6 :                                                     nVal < pVals[iModeIndex];
    7197           6 :                                                 break;
    7198           7 :                                             case GWKTS_Max:
    7199           7 :                                                 bValIsMaxCount =
    7200           7 :                                                     nVal > pVals[iModeIndex];
    7201           7 :                                                 break;
    7202             :                                         }
    7203             :                                     }
    7204             : 
    7205        2312 :                                     if (bValIsMaxCount)
    7206             :                                     {
    7207         817 :                                         iModeIndex = i;
    7208             :                                     }
    7209             : 
    7210        2312 :                                     break;
    7211             :                                 }
    7212             :                             }
    7213             : 
    7214             :                             // Add to arr if entry not already there.
    7215       13699 :                             if (i == nBins)
    7216             :                             {
    7217       11387 :                                 pVals[i] = nVal;
    7218       11387 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7219             : 
    7220       11387 :                                 if (iModeIndex < 0)
    7221        3417 :                                     iModeIndex = i;
    7222             : 
    7223       11387 :                                 ++nBins;
    7224             :                             }
    7225             :                         }
    7226             :                     }
    7227             :                 }
    7228             : 
    7229        3417 :                 if (iModeIndex != -1)
    7230             :                 {
    7231        3417 :                     nVal = pVals[iModeIndex];
    7232        3417 :                     dfBandDensity = 1;
    7233        3417 :                     bHasFoundDensity = true;
    7234             :                 }
    7235             : 
    7236             :                 // We have a computed value from the source.  Now apply it
    7237             :                 // to the destination pixel
    7238        3417 :                 if (bHasFoundDensity)
    7239             :                 {
    7240        3417 :                     GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
    7241             :                                           dfBandDensity, nVal,
    7242             :                                           bAvoidNoDataSingleBand);
    7243             :                 }
    7244             :             }
    7245             : 
    7246        3417 :             if (!bHasFoundDensity)
    7247           0 :                 continue;
    7248             : 
    7249        3417 :             if (!bAvoidNoDataSingleBand)
    7250             :             {
    7251           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7252             :             }
    7253             : 
    7254             :             /* --------------------------------------------------------------------
    7255             :              */
    7256             :             /*      Update destination density/validity masks. */
    7257             :             /* --------------------------------------------------------------------
    7258             :              */
    7259        3417 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7260             : 
    7261        3417 :             if (poWK->panDstValid != nullptr)
    7262             :             {
    7263           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7264             :             }
    7265             :         } /* Next iDstX */
    7266             : 
    7267             :         /* --------------------------------------------------------------------
    7268             :          */
    7269             :         /*      Report progress to the user, and optionally cancel out. */
    7270             :         /* --------------------------------------------------------------------
    7271             :          */
    7272          97 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7273           0 :             break;
    7274             :     }
    7275             : 
    7276             :     /* -------------------------------------------------------------------- */
    7277             :     /*      Cleanup and return.                                             */
    7278             :     /* -------------------------------------------------------------------- */
    7279          19 :     CPLFree(padfX);
    7280          19 :     CPLFree(padfY);
    7281          19 :     CPLFree(padfZ);
    7282          19 :     CPLFree(padfX2);
    7283          19 :     CPLFree(padfY2);
    7284          19 :     CPLFree(padfZ2);
    7285          19 :     CPLFree(pabSuccess);
    7286          19 :     CPLFree(pabSuccess2);
    7287          19 :     VSIFree(pVals);
    7288          19 :     VSIFree(pafCounts);
    7289             : }
    7290             : 
    7291             : /************************************************************************/
    7292             : /*                         GWKModeComplexType()                         */
    7293             : /************************************************************************/
    7294             : 
    7295           8 : static void GWKModeComplexType(GWKJobStruct *psJob)
    7296             : {
    7297           8 :     const GDALWarpKernel *poWK = psJob->poWK;
    7298           8 :     const int iYMin = psJob->iYMin;
    7299           8 :     const int iYMax = psJob->iYMax;
    7300           8 :     const int nDstXSize = poWK->nDstXSize;
    7301           8 :     const int nSrcXSize = poWK->nSrcXSize;
    7302           8 :     const int nSrcYSize = poWK->nSrcYSize;
    7303           8 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7304             :     const double dfMultFactorVerticalShiftPipeline =
    7305           8 :         poWK->bApplyVerticalShift
    7306           8 :             ? CPLAtof(CSLFetchNameValueDef(
    7307           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7308             :                   "1.0"))
    7309           8 :             : 0.0;
    7310             :     const bool bAvoidNoDataSingleBand =
    7311           8 :         poWK->nBands == 1 ||
    7312           0 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7313           8 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7314             : 
    7315           8 :     double *padfRealVals = nullptr;
    7316           8 :     double *padfImagVals = nullptr;
    7317           8 :     float *pafCounts = nullptr;
    7318             : 
    7319           8 :     if (nSrcXSize > 0 && nSrcYSize > 0)
    7320             :     {
    7321             :         padfRealVals = static_cast<double *>(
    7322           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7323             :         padfImagVals = static_cast<double *>(
    7324           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
    7325             :         pafCounts = static_cast<float *>(
    7326           8 :             VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
    7327           8 :         if (padfRealVals == nullptr || padfImagVals == nullptr ||
    7328             :             pafCounts == nullptr)
    7329             :         {
    7330           0 :             VSIFree(padfRealVals);
    7331           0 :             VSIFree(padfImagVals);
    7332           0 :             VSIFree(pafCounts);
    7333           0 :             return;
    7334             :         }
    7335             :     }
    7336             : 
    7337             :     /* -------------------------------------------------------------------- */
    7338             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7339             :     /*      scanlines worth of positions.                                   */
    7340             :     /* -------------------------------------------------------------------- */
    7341             : 
    7342             :     double *padfX =
    7343           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7344             :     double *padfY =
    7345           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7346             :     double *padfZ =
    7347           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7348             :     double *padfX2 =
    7349           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7350             :     double *padfY2 =
    7351           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7352             :     double *padfZ2 =
    7353           8 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7354           8 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7355           8 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7356             : 
    7357           8 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7358           8 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7359           8 :     const double dfErrorThreshold = CPLAtof(
    7360           8 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7361             : 
    7362             :     const int nXMargin =
    7363           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7364             :     const int nYMargin =
    7365           8 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7366             : 
    7367             :     /* ==================================================================== */
    7368             :     /*      Loop over output lines.                                         */
    7369             :     /* ==================================================================== */
    7370          16 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7371             :     {
    7372           8 :         GWKAverageOrModeComputeLineCoords(
    7373             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7374             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7375             : 
    7376             :         // Loop over pixels in output scanline.
    7377          16 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7378             :         {
    7379           8 :             GPtrDiff_t iSrcOffset = 0;
    7380           8 :             double dfDensity = 1.0;
    7381           8 :             bool bHasFoundDensity = false;
    7382             : 
    7383           8 :             bool bWrapOverX = false;
    7384           8 :             double dfXMin = 0;
    7385           8 :             double dfYMin = 0;
    7386           8 :             double dfXMax = 0;
    7387           8 :             double dfYMax = 0;
    7388           8 :             int iSrcXMin = 0;
    7389           8 :             int iSrcYMin = 0;
    7390           8 :             int iSrcXMax = 0;
    7391           8 :             int iSrcYMax = 0;
    7392           8 :             if (!GWKAverageOrModeComputeSourceCoords(
    7393             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7394             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7395             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7396             :             {
    7397           0 :                 continue;
    7398             :             }
    7399             : 
    7400           8 :             const GPtrDiff_t iDstOffset =
    7401           8 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7402             : 
    7403             :             // Loop processing each band.
    7404          16 :             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7405             :             {
    7406           8 :                 double dfBandDensity = 0.0;
    7407             : 
    7408           8 :                 int nBins = 0;
    7409           8 :                 int iModeIndex = -1;
    7410           8 :                 double dfValueReal = 0;
    7411           8 :                 double dfValueImag = 0;
    7412             : 
    7413          16 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7414             :                 {
    7415           8 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7416           8 :                     iSrcOffset =
    7417           8 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7418          38 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7419             :                          iSrcX++, iSrcOffset++)
    7420             :                     {
    7421          30 :                         if (bWrapOverX)
    7422           0 :                             iSrcOffset =
    7423           0 :                                 (iSrcX % nSrcXSize) +
    7424           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7425             : 
    7426          30 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7427           0 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7428           0 :                             continue;
    7429             : 
    7430          30 :                         if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
    7431             :                                              &dfBandDensity, &dfValueReal,
    7432          60 :                                              &dfValueImag) &&
    7433          30 :                             dfBandDensity > BAND_DENSITY_THRESHOLD)
    7434             :                         {
    7435          30 :                             const double dfWeight =
    7436          30 :                                 COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7437             : 
    7438             :                             // Check array for existing entry.
    7439          30 :                             int i = 0;
    7440          49 :                             for (i = 0; i < nBins; ++i)
    7441             :                             {
    7442          47 :                                 if (IsSame(padfRealVals[i], dfValueReal) &&
    7443          14 :                                     IsSame(padfImagVals[i], dfValueImag))
    7444             :                                 {
    7445             : 
    7446          14 :                                     pafCounts[i] +=
    7447          14 :                                         static_cast<float>(dfWeight);
    7448          14 :                                     bool bValIsMaxCount =
    7449          14 :                                         (pafCounts[i] > pafCounts[iModeIndex]);
    7450             : 
    7451          14 :                                     if (!bValIsMaxCount &&
    7452           6 :                                         pafCounts[i] == pafCounts[iModeIndex])
    7453             :                                     {
    7454           3 :                                         switch (eTieStrategy)
    7455             :                                         {
    7456           3 :                                             case GWKTS_First:
    7457           3 :                                                 break;
    7458           0 :                                             case GWKTS_Min:
    7459           0 :                                                 bValIsMaxCount =
    7460           0 :                                                     dfValueReal <
    7461           0 :                                                     padfRealVals[iModeIndex];
    7462           0 :                                                 break;
    7463           0 :                                             case GWKTS_Max:
    7464           0 :                                                 bValIsMaxCount =
    7465           0 :                                                     dfValueReal >
    7466           0 :                                                     padfRealVals[iModeIndex];
    7467           0 :                                                 break;
    7468             :                                         }
    7469             :                                     }
    7470             : 
    7471          14 :                                     if (bValIsMaxCount)
    7472             :                                     {
    7473           8 :                                         iModeIndex = i;
    7474             :                                     }
    7475             : 
    7476          14 :                                     break;
    7477             :                                 }
    7478             :                             }
    7479             : 
    7480             :                             // Add to arr if entry not already there.
    7481          30 :                             if (i == nBins)
    7482             :                             {
    7483          16 :                                 padfRealVals[i] = dfValueReal;
    7484          16 :                                 padfImagVals[i] = dfValueImag;
    7485          16 :                                 pafCounts[i] = static_cast<float>(dfWeight);
    7486             : 
    7487          16 :                                 if (iModeIndex < 0)
    7488           8 :                                     iModeIndex = i;
    7489             : 
    7490          16 :                                 ++nBins;
    7491             :                             }
    7492             :                         }
    7493             :                     }
    7494             :                 }
    7495             : 
    7496           8 :                 if (iModeIndex != -1)
    7497             :                 {
    7498           8 :                     dfValueReal = padfRealVals[iModeIndex];
    7499           8 :                     dfValueImag = padfImagVals[iModeIndex];
    7500           8 :                     dfBandDensity = 1;
    7501             : 
    7502           8 :                     if (poWK->bApplyVerticalShift)
    7503             :                     {
    7504           0 :                         if (!std::isfinite(padfZ[iDstX]))
    7505           0 :                             continue;
    7506             :                         // Subtract padfZ[] since the coordinate
    7507             :                         // transformation is from target to source
    7508           0 :                         dfValueReal =
    7509           0 :                             dfValueReal * poWK->dfMultFactorVerticalShift -
    7510           0 :                             padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
    7511             :                     }
    7512             : 
    7513           8 :                     bHasFoundDensity = true;
    7514             :                 }
    7515             : 
    7516             :                 // We have a computed value from the source.  Now apply it
    7517             :                 // to the destination pixel
    7518           8 :                 if (bHasFoundDensity)
    7519             :                 {
    7520           8 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    7521             :                                      dfValueReal, dfValueImag,
    7522             :                                      bAvoidNoDataSingleBand);
    7523             :                 }
    7524             :             }
    7525             : 
    7526           8 :             if (!bHasFoundDensity)
    7527           0 :                 continue;
    7528             : 
    7529           8 :             if (!bAvoidNoDataSingleBand)
    7530             :             {
    7531           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7532             :             }
    7533             : 
    7534             :             /* --------------------------------------------------------------------
    7535             :              */
    7536             :             /*      Update destination density/validity masks. */
    7537             :             /* --------------------------------------------------------------------
    7538             :              */
    7539           8 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    7540             : 
    7541           8 :             if (poWK->panDstValid != nullptr)
    7542             :             {
    7543           0 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    7544             :             }
    7545             :         } /* Next iDstX */
    7546             : 
    7547             :         /* --------------------------------------------------------------------
    7548             :          */
    7549             :         /*      Report progress to the user, and optionally cancel out. */
    7550             :         /* --------------------------------------------------------------------
    7551             :          */
    7552           8 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    7553           0 :             break;
    7554             :     }
    7555             : 
    7556             :     /* -------------------------------------------------------------------- */
    7557             :     /*      Cleanup and return.                                             */
    7558             :     /* -------------------------------------------------------------------- */
    7559           8 :     CPLFree(padfX);
    7560           8 :     CPLFree(padfY);
    7561           8 :     CPLFree(padfZ);
    7562           8 :     CPLFree(padfX2);
    7563           8 :     CPLFree(padfY2);
    7564           8 :     CPLFree(padfZ2);
    7565           8 :     CPLFree(pabSuccess);
    7566           8 :     CPLFree(pabSuccess2);
    7567           8 :     VSIFree(padfRealVals);
    7568           8 :     VSIFree(padfImagVals);
    7569           8 :     VSIFree(pafCounts);
    7570             : }
    7571             : 
    7572             : /************************************************************************/
    7573             : /*                       GWKAverageOrModeThread()                       */
    7574             : /************************************************************************/
    7575             : 
    7576             : // Overall logic based on GWKGeneralCaseThread().
    7577         246 : static void GWKAverageOrModeThread(void *pData)
    7578             : {
    7579         246 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    7580         246 :     const GDALWarpKernel *poWK = psJob->poWK;
    7581         246 :     const int iYMin = psJob->iYMin;
    7582         246 :     const int iYMax = psJob->iYMax;
    7583             :     const double dfMultFactorVerticalShiftPipeline =
    7584         246 :         poWK->bApplyVerticalShift
    7585         246 :             ? CPLAtof(CSLFetchNameValueDef(
    7586           0 :                   poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
    7587             :                   "1.0"))
    7588         246 :             : 0.0;
    7589             :     const bool bAvoidNoDataSingleBand =
    7590         342 :         poWK->nBands == 1 ||
    7591          96 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7592         246 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    7593             : 
    7594         246 :     const int nDstXSize = poWK->nDstXSize;
    7595         246 :     const int nSrcXSize = poWK->nSrcXSize;
    7596             : 
    7597             :     /* -------------------------------------------------------------------- */
    7598             :     /*      Find out which algorithm to use (small optim.)                  */
    7599             :     /* -------------------------------------------------------------------- */
    7600             : 
    7601             :     // Only used for GRA_Mode
    7602         246 :     float *pafCounts = nullptr;
    7603         246 :     int nBins = 0;
    7604         246 :     int nBinsOffset = 0;
    7605         246 :     const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
    7606             : 
    7607             :     // Only used with Q1, Med and Q3
    7608         246 :     float quant = 0.0f;
    7609             : 
    7610             :     // To control array allocation only when data type is complex
    7611         246 :     const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
    7612             : 
    7613         246 :     if (poWK->eResample == GRA_Mode)
    7614             :     {
    7615          45 :         if (poWK->bApplyVerticalShift)
    7616             :         {
    7617           0 :             return GWKModeComplexType(psJob);
    7618             :         }
    7619             : 
    7620          45 :         switch (poWK->eWorkingDataType)
    7621             :         {
    7622           7 :             case GDT_UInt8:
    7623           7 :                 nBins = 256;
    7624           7 :                 break;
    7625             : 
    7626           1 :             case GDT_Int8:
    7627           1 :                 nBins = 256;
    7628           1 :                 nBinsOffset = nBins / 2;
    7629           1 :                 break;
    7630             : 
    7631           1 :             case GDT_UInt16:
    7632           1 :                 nBins = 65536;
    7633           1 :                 break;
    7634             : 
    7635           9 :             case GDT_Int16:
    7636           9 :                 nBins = 65536;
    7637           9 :                 nBinsOffset = nBins / 2;
    7638           9 :                 break;
    7639             : 
    7640          10 :             case GDT_Int32:
    7641          10 :                 return GWKModeRealType<int32_t>(psJob);
    7642             : 
    7643           1 :             case GDT_UInt32:
    7644           1 :                 return GWKModeRealType<uint32_t>(psJob);
    7645             : 
    7646           1 :             case GDT_Int64:
    7647           1 :                 return GWKModeRealType<int64_t>(psJob);
    7648             : 
    7649           1 :             case GDT_UInt64:
    7650           1 :                 return GWKModeRealType<uint64_t>(psJob);
    7651             : 
    7652           0 :             case GDT_Float16:
    7653           0 :                 return GWKModeRealType<GFloat16>(psJob);
    7654             : 
    7655           4 :             case GDT_Float32:
    7656           4 :                 return GWKModeRealType<float>(psJob);
    7657             : 
    7658           2 :             case GDT_Float64:
    7659           2 :                 return GWKModeRealType<double>(psJob);
    7660             : 
    7661           8 :             case GDT_CInt16:
    7662             :             case GDT_CInt32:
    7663             :             case GDT_CFloat16:
    7664             :             case GDT_CFloat32:
    7665             :             case GDT_CFloat64:
    7666           8 :                 return GWKModeComplexType(psJob);
    7667             : 
    7668           0 :             case GDT_Unknown:
    7669             :             case GDT_TypeCount:
    7670           0 :                 CPLAssert(false);
    7671             :                 return;
    7672             :         }
    7673             : 
    7674          18 :         if (nBins)
    7675             :         {
    7676             :             pafCounts =
    7677          18 :                 static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
    7678          18 :             if (pafCounts == nullptr)
    7679           0 :                 return;
    7680             :         }
    7681             :     }
    7682         201 :     else if (poWK->eResample == GRA_Med)
    7683             :     {
    7684           6 :         quant = 0.5f;
    7685             :     }
    7686         195 :     else if (poWK->eResample == GRA_Q1)
    7687             :     {
    7688          10 :         quant = 0.25f;
    7689             :     }
    7690         185 :     else if (poWK->eResample == GRA_Q3)
    7691             :     {
    7692           5 :         quant = 0.75f;
    7693             :     }
    7694         180 :     else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
    7695          11 :              poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
    7696             :     {
    7697             :         // Other resample algorithms not permitted here.
    7698           0 :         CPLError(CE_Fatal, CPLE_AppDefined,
    7699             :                  "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
    7700             :                  "illegal resample");
    7701             :     }
    7702             : 
    7703         219 :     CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
    7704             : 
    7705             :     /* -------------------------------------------------------------------- */
    7706             :     /*      Allocate x,y,z coordinate arrays for transformation ... two     */
    7707             :     /*      scanlines worth of positions.                                   */
    7708             :     /* -------------------------------------------------------------------- */
    7709             : 
    7710             :     double *padfX =
    7711         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7712             :     double *padfY =
    7713         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7714             :     double *padfZ =
    7715         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7716             :     double *padfX2 =
    7717         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7718             :     double *padfY2 =
    7719         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7720             :     double *padfZ2 =
    7721         219 :         static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
    7722         219 :     int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7723         219 :     int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
    7724             : 
    7725         219 :     const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
    7726         219 :         poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
    7727         219 :     const double dfErrorThreshold = CPLAtof(
    7728         219 :         CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
    7729             : 
    7730             :     const double dfExcludedValuesThreshold =
    7731         219 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7732             :                                      "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
    7733         219 :         100.0;
    7734             :     const double dfNodataValuesThreshold =
    7735         219 :         CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
    7736             :                                      "NODATA_VALUES_PCT_THRESHOLD", "100")) /
    7737         219 :         100.0;
    7738             : 
    7739             :     const int nXMargin =
    7740         219 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
    7741             :     const int nYMargin =
    7742         219 :         2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
    7743             : 
    7744             :     /* ==================================================================== */
    7745             :     /*      Loop over output lines.                                         */
    7746             :     /* ==================================================================== */
    7747       28777 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    7748             :     {
    7749       28558 :         GWKAverageOrModeComputeLineCoords(
    7750             :             psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
    7751             :             pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
    7752             : 
    7753             :         /* ====================================================================
    7754             :          */
    7755             :         /*      Loop over pixels in output scanline. */
    7756             :         /* ====================================================================
    7757             :          */
    7758     7357360 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    7759             :         {
    7760     7328800 :             GPtrDiff_t iSrcOffset = 0;
    7761     7328800 :             double dfDensity = 1.0;
    7762     7328800 :             bool bHasFoundDensity = false;
    7763             : 
    7764     7328800 :             bool bWrapOverX = false;
    7765     7328800 :             double dfXMin = 0;
    7766     7328800 :             double dfYMin = 0;
    7767     7328800 :             double dfXMax = 0;
    7768     7328800 :             double dfYMax = 0;
    7769     7328800 :             int iSrcXMin = 0;
    7770     7328800 :             int iSrcYMin = 0;
    7771     7328800 :             int iSrcXMax = 0;
    7772     7328800 :             int iSrcYMax = 0;
    7773     7328800 :             if (!GWKAverageOrModeComputeSourceCoords(
    7774             :                     psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
    7775             :                     nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
    7776             :                     iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
    7777             :             {
    7778     3158560 :                 continue;
    7779             :             }
    7780             : 
    7781     5314750 :             const GPtrDiff_t iDstOffset =
    7782     5314750 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    7783             : 
    7784     5314750 :             bool bDone = false;
    7785             : 
    7786             :             // Special Average mode where we process all bands together,
    7787             :             // to avoid averaging tuples that match an entry of m_aadfExcludedValues
    7788     5314750 :             constexpr double EPSILON = 1e-10;
    7789    14838200 :             if (poWK->eResample == GRA_Average &&
    7790     4208720 :                 (!poWK->m_aadfExcludedValues.empty() ||
    7791      393224 :                  dfNodataValuesThreshold < 1 - EPSILON) &&
    7792     9523480 :                 !poWK->bApplyVerticalShift && !bIsComplex)
    7793             :             {
    7794      393224 :                 double dfTotalWeightInvalid = 0.0;
    7795      393224 :                 double dfTotalWeightExcluded = 0.0;
    7796      393224 :                 double dfTotalWeightRegular = 0.0;
    7797      786448 :                 std::vector<double> adfValueReal(poWK->nBands, 0);
    7798      786448 :                 std::vector<double> adfValueAveraged(poWK->nBands, 0);
    7799             :                 std::vector<int> anCountExcludedValues(
    7800      393224 :                     poWK->m_aadfExcludedValues.size(), 0);
    7801             : 
    7802     1179670 :                 for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7803             :                 {
    7804      786448 :                     const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7805      786448 :                     iSrcOffset =
    7806      786448 :                         iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7807     2359340 :                     for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7808             :                          iSrcX++, iSrcOffset++)
    7809             :                     {
    7810     1572900 :                         if (bWrapOverX)
    7811           0 :                             iSrcOffset =
    7812           0 :                                 (iSrcX % nSrcXSize) +
    7813           0 :                                 static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7814             : 
    7815     1572900 :                         const double dfWeight =
    7816     1572900 :                             COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7817     1572900 :                         if (dfWeight <= 0)
    7818           0 :                             continue;
    7819             : 
    7820     1572910 :                         if (poWK->panUnifiedSrcValid != nullptr &&
    7821          12 :                             !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    7822             :                         {
    7823           3 :                             dfTotalWeightInvalid += dfWeight;
    7824           3 :                             continue;
    7825             :                         }
    7826             : 
    7827     1572890 :                         bool bAllValid = true;
    7828     2359410 :                         for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7829             :                         {
    7830     2097230 :                             double dfBandDensity = 0;
    7831     2097230 :                             double dfValueImagTmp = 0;
    7832     2883740 :                             if (!(GWKGetPixelValue(
    7833             :                                       poWK, iBand, iSrcOffset, &dfBandDensity,
    7834     2097230 :                                       &adfValueReal[iBand], &dfValueImagTmp) &&
    7835      786513 :                                   dfBandDensity > BAND_DENSITY_THRESHOLD))
    7836             :                             {
    7837     1310720 :                                 bAllValid = false;
    7838     1310720 :                                 break;
    7839             :                             }
    7840             :                         }
    7841             : 
    7842     1572890 :                         if (!bAllValid)
    7843             :                         {
    7844     1310720 :                             dfTotalWeightInvalid += dfWeight;
    7845     1310720 :                             continue;
    7846             :                         }
    7847             : 
    7848      262177 :                         bool bExcludedValueFound = false;
    7849      393263 :                         for (size_t i = 0;
    7850      393263 :                              i < poWK->m_aadfExcludedValues.size(); ++i)
    7851             :                         {
    7852      131092 :                             if (poWK->m_aadfExcludedValues[i] == adfValueReal)
    7853             :                             {
    7854           6 :                                 bExcludedValueFound = true;
    7855           6 :                                 ++anCountExcludedValues[i];
    7856           6 :                                 dfTotalWeightExcluded += dfWeight;
    7857           6 :                                 break;
    7858             :                             }
    7859             :                         }
    7860      262177 :                         if (!bExcludedValueFound)
    7861             :                         {
    7862             :                             // Weighted incremental algorithm mean
    7863             :                             // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7864      262171 :                             dfTotalWeightRegular += dfWeight;
    7865     1048670 :                             for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7866             :                             {
    7867      786495 :                                 adfValueAveraged[iBand] +=
    7868     1572990 :                                     (dfWeight / dfTotalWeightRegular) *
    7869     1572990 :                                     (adfValueReal[iBand] -
    7870      786495 :                                      adfValueAveraged[iBand]);
    7871             :                             }
    7872             :                         }
    7873             :                     }
    7874             :                 }
    7875             : 
    7876      393224 :                 const double dfTotalWeight = dfTotalWeightInvalid +
    7877             :                                              dfTotalWeightExcluded +
    7878             :                                              dfTotalWeightRegular;
    7879      393224 :                 if (dfTotalWeightInvalid > 0 &&
    7880             :                     dfTotalWeightInvalid >=
    7881      327685 :                         dfNodataValuesThreshold * dfTotalWeight)
    7882             :                 {
    7883             :                     // Do nothing. Let bHasFoundDensity to false.
    7884             :                 }
    7885       65543 :                 else if (dfTotalWeightExcluded > 0 &&
    7886             :                          dfTotalWeightExcluded >=
    7887           6 :                              dfExcludedValuesThreshold * dfTotalWeight)
    7888             :                 {
    7889             :                     // Find the most represented excluded value tuple
    7890           2 :                     size_t iExcludedValue = 0;
    7891           2 :                     int nExcludedValueCount = 0;
    7892           4 :                     for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
    7893             :                          ++i)
    7894             :                     {
    7895           2 :                         if (anCountExcludedValues[i] > nExcludedValueCount)
    7896             :                         {
    7897           2 :                             iExcludedValue = i;
    7898           2 :                             nExcludedValueCount = anCountExcludedValues[i];
    7899             :                         }
    7900             :                     }
    7901             : 
    7902           2 :                     bHasFoundDensity = true;
    7903             : 
    7904           8 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7905             :                     {
    7906           6 :                         GWKSetPixelValue(
    7907             :                             poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
    7908           6 :                             poWK->m_aadfExcludedValues[iExcludedValue][iBand],
    7909             :                             0, bAvoidNoDataSingleBand);
    7910             :                     }
    7911             : 
    7912           2 :                     if (!bAvoidNoDataSingleBand)
    7913             :                     {
    7914           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7915           2 :                     }
    7916             :                 }
    7917       65541 :                 else if (dfTotalWeightRegular > 0)
    7918             :                 {
    7919       65541 :                     bHasFoundDensity = true;
    7920             : 
    7921      262160 :                     for (int iBand = 0; iBand < poWK->nBands; iBand++)
    7922             :                     {
    7923      196619 :                         GWKSetPixelValue(poWK, iBand, iDstOffset,
    7924             :                                          /* dfBandDensity = */ 1.0,
    7925      196619 :                                          adfValueAveraged[iBand], 0,
    7926             :                                          bAvoidNoDataSingleBand);
    7927             :                     }
    7928             : 
    7929       65541 :                     if (!bAvoidNoDataSingleBand)
    7930             :                     {
    7931           0 :                         GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    7932             :                     }
    7933             :                 }
    7934             : 
    7935             :                 // Skip below loop on bands
    7936      393224 :                 bDone = true;
    7937             :             }
    7938             : 
    7939             :             /* ====================================================================
    7940             :              */
    7941             :             /*      Loop processing each band. */
    7942             :             /* ====================================================================
    7943             :              */
    7944             : 
    7945    17670500 :             for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
    7946             :             {
    7947    12355700 :                 double dfBandDensity = 0.0;
    7948    12355700 :                 double dfValueReal = 0.0;
    7949    12355700 :                 double dfValueImag = 0.0;
    7950    12355700 :                 double dfValueRealTmp = 0.0;
    7951    12355700 :                 double dfValueImagTmp = 0.0;
    7952             : 
    7953             :                 /* --------------------------------------------------------------------
    7954             :                  */
    7955             :                 /*      Collect the source value. */
    7956             :                 /* --------------------------------------------------------------------
    7957             :                  */
    7958             : 
    7959             :                 // Loop over source lines and pixels - 3 possible algorithms.
    7960             : 
    7961    12355700 :                 if (poWK->eResample == GRA_Average)
    7962             :                 {
    7963     9833240 :                     double dfTotalWeight = 0.0;
    7964             : 
    7965             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    7966             :                     // in gcore/overview.cpp.
    7967    25243600 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    7968             :                     {
    7969    15410300 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    7970    15410300 :                         iSrcOffset = iSrcXMin +
    7971    15410300 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7972    44761400 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    7973             :                              iSrcX++, iSrcOffset++)
    7974             :                         {
    7975    29351100 :                             if (bWrapOverX)
    7976        2571 :                                 iSrcOffset =
    7977        2571 :                                     (iSrcX % nSrcXSize) +
    7978        2571 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    7979             : 
    7980    29351100 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    7981           4 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    7982             :                                             iSrcOffset))
    7983             :                             {
    7984           1 :                                 continue;
    7985             :                             }
    7986             : 
    7987    29351100 :                             if (GWKGetPixelValue(
    7988             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    7989    48239400 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    7990    18888400 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    7991             :                             {
    7992    18888400 :                                 const double dfWeight =
    7993    18888400 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    7994    18888400 :                                 if (dfWeight > 0)
    7995             :                                 {
    7996             :                                     // Weighted incremental algorithm mean
    7997             :                                     // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
    7998    18888400 :                                     dfTotalWeight += dfWeight;
    7999    18888400 :                                     dfValueReal +=
    8000    18888400 :                                         (dfWeight / dfTotalWeight) *
    8001    18888400 :                                         (dfValueRealTmp - dfValueReal);
    8002    18888400 :                                     if (bIsComplex)
    8003             :                                     {
    8004         252 :                                         dfValueImag +=
    8005         252 :                                             (dfWeight / dfTotalWeight) *
    8006         252 :                                             (dfValueImagTmp - dfValueImag);
    8007             :                                     }
    8008             :                                 }
    8009             :                             }
    8010             :                         }
    8011             :                     }
    8012             : 
    8013     9833240 :                     if (dfTotalWeight > 0)
    8014             :                     {
    8015     7530420 :                         if (poWK->bApplyVerticalShift)
    8016             :                         {
    8017           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8018           0 :                                 continue;
    8019             :                             // Subtract padfZ[] since the coordinate
    8020             :                             // transformation is from target to source
    8021           0 :                             dfValueReal =
    8022           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8023           0 :                                 padfZ[iDstX] *
    8024             :                                     dfMultFactorVerticalShiftPipeline;
    8025             :                         }
    8026             : 
    8027     7530420 :                         dfBandDensity = 1;
    8028     7530420 :                         bHasFoundDensity = true;
    8029             :                     }
    8030             :                 }  // GRA_Average.
    8031             : 
    8032     2522460 :                 else if (poWK->eResample == GRA_RMS)
    8033             :                 {
    8034      300416 :                     double dfTotalReal = 0.0;
    8035      300416 :                     double dfTotalImag = 0.0;
    8036      300416 :                     double dfTotalWeight = 0.0;
    8037             :                     // This code adapted from GDALDownsampleChunk32R_AverageT()
    8038             :                     // in gcore/overview.cpp.
    8039      630578 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8040             :                     {
    8041      330162 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8042      330162 :                         iSrcOffset = iSrcXMin +
    8043      330162 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8044      772930 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8045             :                              iSrcX++, iSrcOffset++)
    8046             :                         {
    8047      442768 :                             if (bWrapOverX)
    8048        1371 :                                 iSrcOffset =
    8049        1371 :                                     (iSrcX % nSrcXSize) +
    8050        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8051             : 
    8052      442768 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8053           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8054             :                                             iSrcOffset))
    8055             :                             {
    8056           0 :                                 continue;
    8057             :                             }
    8058             : 
    8059      442768 :                             if (GWKGetPixelValue(
    8060             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8061      885536 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8062      442768 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8063             :                             {
    8064      442768 :                                 const double dfWeight =
    8065      442768 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8066      442768 :                                 dfTotalWeight += dfWeight;
    8067      442768 :                                 dfTotalReal +=
    8068      442768 :                                     dfValueRealTmp * dfValueRealTmp * dfWeight;
    8069      442768 :                                 if (bIsComplex)
    8070          48 :                                     dfTotalImag += dfValueImagTmp *
    8071          48 :                                                    dfValueImagTmp * dfWeight;
    8072             :                             }
    8073             :                         }
    8074             :                     }
    8075             : 
    8076      300416 :                     if (dfTotalWeight > 0)
    8077             :                     {
    8078      300416 :                         dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
    8079             : 
    8080      300416 :                         if (poWK->bApplyVerticalShift)
    8081             :                         {
    8082           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8083           0 :                                 continue;
    8084             :                             // Subtract padfZ[] since the coordinate
    8085             :                             // transformation is from target to source
    8086           0 :                             dfValueReal =
    8087           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8088           0 :                                 padfZ[iDstX] *
    8089             :                                     dfMultFactorVerticalShiftPipeline;
    8090             :                         }
    8091             : 
    8092      300416 :                         if (bIsComplex)
    8093          12 :                             dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
    8094             : 
    8095      300416 :                         dfBandDensity = 1;
    8096      300416 :                         bHasFoundDensity = true;
    8097             :                     }
    8098             :                 }  // GRA_RMS.
    8099             : 
    8100     2222040 :                 else if (poWK->eResample == GRA_Mode)
    8101             :                 {
    8102      496623 :                     float fMaxCount = 0.0f;
    8103      496623 :                     int nMode = -1;
    8104      496623 :                     bool bHasSourceValues = false;
    8105             : 
    8106      496623 :                     memset(pafCounts, 0, nBins * sizeof(float));
    8107             : 
    8108     1167120 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8109             :                     {
    8110      670495 :                         const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
    8111      670495 :                         iSrcOffset = iSrcXMin +
    8112      670495 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8113     1964680 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8114             :                              iSrcX++, iSrcOffset++)
    8115             :                         {
    8116     1294190 :                             if (bWrapOverX)
    8117        1371 :                                 iSrcOffset =
    8118        1371 :                                     (iSrcX % nSrcXSize) +
    8119        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8120             : 
    8121     1294190 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8122           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8123             :                                             iSrcOffset))
    8124           0 :                                 continue;
    8125             : 
    8126     1294190 :                             if (GWKGetPixelValue(
    8127             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8128     2588370 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8129     1294190 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8130             :                             {
    8131     1294190 :                                 bHasSourceValues = true;
    8132     1294190 :                                 const int nVal =
    8133     1294190 :                                     static_cast<int>(dfValueRealTmp);
    8134     1294190 :                                 const int iBin = nVal + nBinsOffset;
    8135     1294190 :                                 const double dfWeight =
    8136     1294190 :                                     COMPUTE_WEIGHT(iSrcX, dfWeightY);
    8137             : 
    8138             :                                 // Sum the density.
    8139     1294190 :                                 pafCounts[iBin] += static_cast<float>(dfWeight);
    8140             :                                 // Is it the most common value so far?
    8141     1294190 :                                 bool bUpdateMode = pafCounts[iBin] > fMaxCount;
    8142     1294190 :                                 if (!bUpdateMode &&
    8143      227545 :                                     pafCounts[iBin] == fMaxCount)
    8144             :                                 {
    8145       15866 :                                     switch (eTieStrategy)
    8146             :                                     {
    8147       15858 :                                         case GWKTS_First:
    8148       15858 :                                             break;
    8149           4 :                                         case GWKTS_Min:
    8150           4 :                                             bUpdateMode = nVal < nMode;
    8151           4 :                                             break;
    8152           4 :                                         case GWKTS_Max:
    8153           4 :                                             bUpdateMode = nVal > nMode;
    8154           4 :                                             break;
    8155             :                                     }
    8156             :                                 }
    8157     1294190 :                                 if (bUpdateMode)
    8158             :                                 {
    8159     1066640 :                                     nMode = nVal;
    8160     1066640 :                                     fMaxCount = pafCounts[iBin];
    8161             :                                 }
    8162             :                             }
    8163             :                         }
    8164             :                     }
    8165             : 
    8166      496623 :                     if (bHasSourceValues)
    8167             :                     {
    8168      496623 :                         dfValueReal = nMode;
    8169      496623 :                         dfBandDensity = 1;
    8170      496623 :                         bHasFoundDensity = true;
    8171             :                     }
    8172             :                 }  // GRA_Mode.
    8173             : 
    8174     1725420 :                 else if (poWK->eResample == GRA_Max)
    8175             :                 {
    8176      335037 :                     bool bFoundValid = false;
    8177      335037 :                     double dfTotalReal = cpl::NumericLimits<double>::lowest();
    8178             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8179      842572 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8180             :                     {
    8181      507535 :                         iSrcOffset = iSrcXMin +
    8182      507535 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8183     1638060 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8184             :                              iSrcX++, iSrcOffset++)
    8185             :                         {
    8186     1130520 :                             if (bWrapOverX)
    8187        1371 :                                 iSrcOffset =
    8188        1371 :                                     (iSrcX % nSrcXSize) +
    8189        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8190             : 
    8191     1133330 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8192        2809 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8193             :                                             iSrcOffset))
    8194             :                             {
    8195        2446 :                                 continue;
    8196             :                             }
    8197             : 
    8198             :                             // Returns pixel value if it is not no data.
    8199     1128070 :                             if (GWKGetPixelValue(
    8200             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8201     2256150 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8202     1128070 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8203             :                             {
    8204     1128070 :                                 bFoundValid = true;
    8205     1128070 :                                 if (dfTotalReal < dfValueRealTmp)
    8206             :                                 {
    8207      463372 :                                     dfTotalReal = dfValueRealTmp;
    8208             :                                 }
    8209             :                             }
    8210             :                         }
    8211             :                     }
    8212             : 
    8213      335037 :                     if (bFoundValid)
    8214             :                     {
    8215      335037 :                         dfValueReal = dfTotalReal;
    8216             : 
    8217      335037 :                         if (poWK->bApplyVerticalShift)
    8218             :                         {
    8219           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8220           0 :                                 continue;
    8221             :                             // Subtract padfZ[] since the coordinate
    8222             :                             // transformation is from target to source
    8223           0 :                             dfValueReal =
    8224           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8225           0 :                                 padfZ[iDstX] *
    8226             :                                     dfMultFactorVerticalShiftPipeline;
    8227             :                         }
    8228             : 
    8229      335037 :                         dfBandDensity = 1;
    8230      335037 :                         bHasFoundDensity = true;
    8231             :                     }
    8232             :                 }
    8233             : 
    8234     1390380 :                 else if (poWK->eResample == GRA_Min)
    8235             :                 {
    8236      335012 :                     bool bFoundValid = false;
    8237      335012 :                     double dfTotalReal = cpl::NumericLimits<double>::max();
    8238             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8239      842282 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8240             :                     {
    8241      507270 :                         iSrcOffset = iSrcXMin +
    8242      507270 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8243     1634980 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8244             :                              iSrcX++, iSrcOffset++)
    8245             :                         {
    8246     1127710 :                             if (bWrapOverX)
    8247        1371 :                                 iSrcOffset =
    8248        1371 :                                     (iSrcX % nSrcXSize) +
    8249        1371 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8250             : 
    8251     1127710 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8252           0 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8253             :                                             iSrcOffset))
    8254             :                             {
    8255           0 :                                 continue;
    8256             :                             }
    8257             : 
    8258             :                             // Returns pixel value if it is not no data.
    8259     1127710 :                             if (GWKGetPixelValue(
    8260             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8261     2255420 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8262     1127710 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8263             :                             {
    8264     1127710 :                                 bFoundValid = true;
    8265     1127710 :                                 if (dfTotalReal > dfValueRealTmp)
    8266             :                                 {
    8267      464157 :                                     dfTotalReal = dfValueRealTmp;
    8268             :                                 }
    8269             :                             }
    8270             :                         }
    8271             :                     }
    8272             : 
    8273      335012 :                     if (bFoundValid)
    8274             :                     {
    8275      335012 :                         dfValueReal = dfTotalReal;
    8276             : 
    8277      335012 :                         if (poWK->bApplyVerticalShift)
    8278             :                         {
    8279           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8280           0 :                                 continue;
    8281             :                             // Subtract padfZ[] since the coordinate
    8282             :                             // transformation is from target to source
    8283           0 :                             dfValueReal =
    8284           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8285           0 :                                 padfZ[iDstX] *
    8286             :                                     dfMultFactorVerticalShiftPipeline;
    8287             :                         }
    8288             : 
    8289      335012 :                         dfBandDensity = 1;
    8290      335012 :                         bHasFoundDensity = true;
    8291             :                     }
    8292             :                 }  // GRA_Min.
    8293             : 
    8294             :                 else
    8295             :                 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
    8296             :                 {
    8297     1055370 :                     CPLAssert(quant > 0.0f);
    8298             : 
    8299     1055370 :                     bool bFoundValid = false;
    8300     1055370 :                     std::vector<double> dfRealValuesTmp;
    8301             : 
    8302             :                     // This code adapted from nAlgo 1 method, GRA_Average.
    8303     2677810 :                     for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
    8304             :                     {
    8305     1622440 :                         iSrcOffset = iSrcXMin +
    8306     1622440 :                                      static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8307     5205220 :                         for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
    8308             :                              iSrcX++, iSrcOffset++)
    8309             :                         {
    8310     3582770 :                             if (bWrapOverX)
    8311        4113 :                                 iSrcOffset =
    8312        4113 :                                     (iSrcX % nSrcXSize) +
    8313        4113 :                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
    8314             : 
    8315     3779380 :                             if (poWK->panUnifiedSrcValid != nullptr &&
    8316      196608 :                                 !CPLMaskGet(poWK->panUnifiedSrcValid,
    8317             :                                             iSrcOffset))
    8318             :                             {
    8319      195449 :                                 continue;
    8320             :                             }
    8321             : 
    8322             :                             // Returns pixel value if it is not no data.
    8323     3387320 :                             if (GWKGetPixelValue(
    8324             :                                     poWK, iBand, iSrcOffset, &dfBandDensity,
    8325     6774650 :                                     &dfValueRealTmp, &dfValueImagTmp) &&
    8326     3387320 :                                 dfBandDensity > BAND_DENSITY_THRESHOLD)
    8327             :                             {
    8328     3387320 :                                 bFoundValid = true;
    8329     3387320 :                                 dfRealValuesTmp.push_back(dfValueRealTmp);
    8330             :                             }
    8331             :                         }
    8332             :                     }
    8333             : 
    8334     1055370 :                     if (bFoundValid)
    8335             :                     {
    8336     1006150 :                         std::sort(dfRealValuesTmp.begin(),
    8337             :                                   dfRealValuesTmp.end());
    8338             :                         int quantIdx = static_cast<int>(
    8339     1006150 :                             std::ceil(quant * dfRealValuesTmp.size() - 1));
    8340     1006150 :                         dfValueReal = dfRealValuesTmp[quantIdx];
    8341             : 
    8342     1006150 :                         if (poWK->bApplyVerticalShift)
    8343             :                         {
    8344           0 :                             if (!std::isfinite(padfZ[iDstX]))
    8345           0 :                                 continue;
    8346             :                             // Subtract padfZ[] since the coordinate
    8347             :                             // transformation is from target to source
    8348           0 :                             dfValueReal =
    8349           0 :                                 dfValueReal * poWK->dfMultFactorVerticalShift -
    8350           0 :                                 padfZ[iDstX] *
    8351             :                                     dfMultFactorVerticalShiftPipeline;
    8352             :                         }
    8353             : 
    8354     1006150 :                         dfBandDensity = 1;
    8355     1006150 :                         bHasFoundDensity = true;
    8356     1006150 :                         dfRealValuesTmp.clear();
    8357             :                     }
    8358             :                 }  // Quantile.
    8359             : 
    8360             :                 /* --------------------------------------------------------------------
    8361             :                  */
    8362             :                 /*      We have a computed value from the source.  Now apply it
    8363             :                  * to      */
    8364             :                 /*      the destination pixel. */
    8365             :                 /* --------------------------------------------------------------------
    8366             :                  */
    8367    12355700 :                 if (bHasFoundDensity)
    8368             :                 {
    8369             :                     // TODO: Should we compute dfBandDensity in fct of
    8370             :                     // nCount/nCount2, or use as a threshold to set the dest
    8371             :                     // value?
    8372             :                     // dfBandDensity = (float) nCount / nCount2;
    8373             :                     // if( (float) nCount / nCount2 > 0.1 )
    8374             :                     // or fix gdalwarp crop_to_cutline to crop partially
    8375             :                     // overlapping pixels.
    8376    10003600 :                     GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    8377             :                                      dfValueReal, dfValueImag,
    8378             :                                      bAvoidNoDataSingleBand);
    8379             :                 }
    8380             :             }
    8381             : 
    8382     5314750 :             if (!bHasFoundDensity)
    8383     1144510 :                 continue;
    8384             : 
    8385     4170240 :             if (!bAvoidNoDataSingleBand)
    8386             :             {
    8387           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    8388             :             }
    8389             : 
    8390             :             /* --------------------------------------------------------------------
    8391             :              */
    8392             :             /*      Update destination density/validity masks. */
    8393             :             /* --------------------------------------------------------------------
    8394             :              */
    8395     4170240 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity);
    8396             : 
    8397     4170240 :             if (poWK->panDstValid != nullptr)
    8398             :             {
    8399        1184 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    8400             :             }
    8401             :         } /* Next iDstX */
    8402             : 
    8403             :         /* --------------------------------------------------------------------
    8404             :          */
    8405             :         /*      Report progress to the user, and optionally cancel out. */
    8406             :         /* --------------------------------------------------------------------
    8407             :          */
    8408       28558 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    8409           0 :             break;
    8410             :     }
    8411             : 
    8412             :     /* -------------------------------------------------------------------- */
    8413             :     /*      Cleanup and return.                                             */
    8414             :     /* -------------------------------------------------------------------- */
    8415         219 :     CPLFree(padfX);
    8416         219 :     CPLFree(padfY);
    8417         219 :     CPLFree(padfZ);
    8418         219 :     CPLFree(padfX2);
    8419         219 :     CPLFree(padfY2);
    8420         219 :     CPLFree(padfZ2);
    8421         219 :     CPLFree(pabSuccess);
    8422         219 :     CPLFree(pabSuccess2);
    8423         219 :     VSIFree(pafCounts);
    8424             : }
    8425             : 
    8426             : /************************************************************************/
    8427             : /*                           getOrientation()                           */
    8428             : /************************************************************************/
    8429             : 
    8430             : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
    8431             : // -1 if it is counter-clockwise oriented,
    8432             : // or 0 if it is colinear.
    8433     2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
    8434             : {
    8435     2355910 :     const double p1x = p1.first;
    8436     2355910 :     const double p1y = p1.second;
    8437     2355910 :     const double p2x = p2.first;
    8438     2355910 :     const double p2y = p2.second;
    8439     2355910 :     const double p3x = p3.first;
    8440     2355910 :     const double p3y = p3.second;
    8441     2355910 :     const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
    8442     2355910 :     if (std::abs(val) < 1e-20)
    8443        2690 :         return 0;
    8444     2353220 :     else if (val > 0)
    8445           0 :         return 1;
    8446             :     else
    8447     2353220 :         return -1;
    8448             : }
    8449             : 
    8450             : /************************************************************************/
    8451             : /*                              isConvex()                              */
    8452             : /************************************************************************/
    8453             : 
    8454             : // poly must be closed
    8455      785302 : static bool isConvex(const XYPoly &poly)
    8456             : {
    8457      785302 :     const size_t n = poly.size();
    8458      785302 :     size_t i = 0;
    8459      785302 :     int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8460      785302 :     ++i;
    8461     2355910 :     for (; i < n - 2; ++i)
    8462             :     {
    8463             :         const int orientation =
    8464     1570600 :             getOrientation(poly[i], poly[i + 1], poly[i + 2]);
    8465     1570600 :         if (orientation != 0)
    8466             :         {
    8467     1567910 :             if (last_orientation == 0)
    8468           0 :                 last_orientation = orientation;
    8469     1567910 :             else if (orientation != last_orientation)
    8470           0 :                 return false;
    8471             :         }
    8472             :     }
    8473      785302 :     return true;
    8474             : }
    8475             : 
    8476             : /************************************************************************/
    8477             : /*                     pointIntersectsConvexPoly()                      */
    8478             : /************************************************************************/
    8479             : 
    8480             : // Returns whether xy intersects poly, that must be closed and convex.
    8481     6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
    8482             : {
    8483     6049100 :     const size_t n = poly.size();
    8484     6049100 :     double dx1 = xy.first - poly[0].first;
    8485     6049100 :     double dy1 = xy.second - poly[0].second;
    8486     6049100 :     double dx2 = poly[1].first - poly[0].first;
    8487     6049100 :     double dy2 = poly[1].second - poly[0].second;
    8488     6049100 :     double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
    8489             : 
    8490             :     // Check if the point remains on the same side (left/right) of all edges
    8491    14556400 :     for (size_t i = 2; i < n; i++)
    8492             :     {
    8493    12793100 :         dx1 = xy.first - poly[i - 1].first;
    8494    12793100 :         dy1 = xy.second - poly[i - 1].second;
    8495             : 
    8496    12793100 :         dx2 = poly[i].first - poly[i - 1].first;
    8497    12793100 :         dy2 = poly[i].second - poly[i - 1].second;
    8498             : 
    8499    12793100 :         double crossProduct = dx1 * dy2 - dx2 * dy1;
    8500    12793100 :         if (std::abs(prevCrossProduct) < 1e-20)
    8501      725558 :             prevCrossProduct = crossProduct;
    8502    12067500 :         else if (prevCrossProduct * crossProduct < 0)
    8503     4285760 :             return false;
    8504             :     }
    8505             : 
    8506     1763340 :     return true;
    8507             : }
    8508             : 
    8509             : /************************************************************************/
    8510             : /*                          getIntersection()                           */
    8511             : /************************************************************************/
    8512             : 
    8513             : /* Returns intersection of [p1,p2] with [p3,p4], if
    8514             :  * it is a single point, and the 2 segments are not colinear.
    8515             :  */
    8516    11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
    8517             :                             const XYPair &p3, const XYPair &p4, XYPair &xy)
    8518             : {
    8519    11811000 :     const double x1 = p1.first;
    8520    11811000 :     const double y1 = p1.second;
    8521    11811000 :     const double x2 = p2.first;
    8522    11811000 :     const double y2 = p2.second;
    8523    11811000 :     const double x3 = p3.first;
    8524    11811000 :     const double y3 = p3.second;
    8525    11811000 :     const double x4 = p4.first;
    8526    11811000 :     const double y4 = p4.second;
    8527    11811000 :     const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
    8528    11811000 :     const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
    8529    11811000 :     if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
    8530     9260780 :         return false;
    8531             : 
    8532     2550260 :     const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
    8533     2550260 :     if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
    8534      973924 :         return false;
    8535             : 
    8536     1576340 :     const double t = t_num / denom;
    8537     1576340 :     xy.first = x1 + t * (x2 - x1);
    8538     1576340 :     xy.second = y1 + t * (y2 - y1);
    8539     1576340 :     return true;
    8540             : }
    8541             : 
    8542             : /************************************************************************/
    8543             : /*                     getConvexPolyIntersection()                      */
    8544             : /************************************************************************/
    8545             : 
    8546             : // poly1 and poly2 must be closed and convex.
    8547             : // The returned intersection will not necessary be closed.
    8548      785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
    8549             :                                       XYPoly &intersection)
    8550             : {
    8551      785302 :     intersection.clear();
    8552             : 
    8553             :     // Add all points of poly1 inside poly2
    8554     3926510 :     for (size_t i = 0; i < poly1.size() - 1; ++i)
    8555             :     {
    8556     3141210 :         if (pointIntersectsConvexPoly(poly1[i], poly2))
    8557     1187430 :             intersection.push_back(poly1[i]);
    8558             :     }
    8559      785302 :     if (intersection.size() == poly1.size() - 1)
    8560             :     {
    8561             :         // poly1 is inside poly2
    8562      119100 :         return;
    8563             :     }
    8564             : 
    8565             :     // Add all points of poly2 inside poly1
    8566     3634860 :     for (size_t i = 0; i < poly2.size() - 1; ++i)
    8567             :     {
    8568     2907890 :         if (pointIntersectsConvexPoly(poly2[i], poly1))
    8569      575904 :             intersection.push_back(poly2[i]);
    8570             :     }
    8571             : 
    8572             :     // Compute the intersection of all edges of both polygons
    8573      726972 :     XYPair xy;
    8574     3634860 :     for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
    8575             :     {
    8576    14539400 :         for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
    8577             :         {
    8578    11631600 :             if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
    8579    11631600 :                                 poly2[i2 + 1], xy))
    8580             :             {
    8581     1576230 :                 intersection.push_back(xy);
    8582             :             }
    8583             :         }
    8584             :     }
    8585             : 
    8586      726972 :     if (intersection.empty())
    8587       60770 :         return;
    8588             : 
    8589             :     // Find lowest-left point in intersection set
    8590      666202 :     double lowest_x = cpl::NumericLimits<double>::max();
    8591      666202 :     double lowest_y = cpl::NumericLimits<double>::max();
    8592     3772450 :     for (const auto &pair : intersection)
    8593             :     {
    8594     3106240 :         const double x = pair.first;
    8595     3106240 :         const double y = pair.second;
    8596     3106240 :         if (y < lowest_y || (y == lowest_y && x < lowest_x))
    8597             :         {
    8598     1096040 :             lowest_x = x;
    8599     1096040 :             lowest_y = y;
    8600             :         }
    8601             :     }
    8602             : 
    8603     5737980 :     const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
    8604             :     {
    8605     5737980 :         const double p1x_diff = p1.first - lowest_x;
    8606     5737980 :         const double p1y_diff = p1.second - lowest_y;
    8607     5737980 :         const double p2x_diff = p2.first - lowest_x;
    8608     5737980 :         const double p2y_diff = p2.second - lowest_y;
    8609     5737980 :         if (p2y_diff == 0.0 && p1y_diff == 0.0)
    8610             :         {
    8611     2655420 :             if (p1x_diff >= 0)
    8612             :             {
    8613     2655420 :                 if (p2x_diff >= 0)
    8614     2655420 :                     return p1.first < p2.first;
    8615           0 :                 return true;
    8616             :             }
    8617             :             else
    8618             :             {
    8619           0 :                 if (p2x_diff >= 0)
    8620           0 :                     return false;
    8621           0 :                 return p1.first < p2.first;
    8622             :             }
    8623             :         }
    8624             : 
    8625     3082560 :         if (p2x_diff == 0.0 && p1x_diff == 0.0)
    8626     1046960 :             return p1.second < p2.second;
    8627             : 
    8628             :         double tan_p1;
    8629     2035600 :         if (p1x_diff == 0.0)
    8630      464622 :             tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8631             :         else
    8632     1570980 :             tan_p1 = p1y_diff / p1x_diff;
    8633             : 
    8634             :         double tan_p2;
    8635     2035600 :         if (p2x_diff == 0.0)
    8636      839515 :             tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
    8637             :         else
    8638     1196080 :             tan_p2 = p2y_diff / p2x_diff;
    8639             : 
    8640     2035600 :         if (tan_p1 >= 0)
    8641             :         {
    8642     1904790 :             if (tan_p2 >= 0)
    8643     1881590 :                 return tan_p1 < tan_p2;
    8644             :             else
    8645       23199 :                 return true;
    8646             :         }
    8647             :         else
    8648             :         {
    8649      130806 :             if (tan_p2 >= 0)
    8650      103900 :                 return false;
    8651             :             else
    8652       26906 :                 return tan_p1 < tan_p2;
    8653             :         }
    8654      666202 :     };
    8655             : 
    8656             :     // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
    8657             :     // hull
    8658      666202 :     std::sort(intersection.begin(), intersection.end(), sortFunc);
    8659             : 
    8660             :     // Remove duplicated points
    8661      666202 :     size_t j = 1;
    8662     3106240 :     for (size_t i = 1; i < intersection.size(); ++i)
    8663             :     {
    8664     2440040 :         if (intersection[i] != intersection[i - 1])
    8665             :         {
    8666     1452560 :             if (j < i)
    8667      545275 :                 intersection[j] = intersection[i];
    8668     1452560 :             ++j;
    8669             :         }
    8670             :     }
    8671      666202 :     intersection.resize(j);
    8672             : }
    8673             : 
    8674             : /************************************************************************/
    8675             : /*                          GWKSumPreserving()                          */
    8676             : /************************************************************************/
    8677             : 
    8678             : static void GWKSumPreservingThread(void *pData);
    8679             : 
    8680          19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
    8681             : {
    8682          19 :     return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
    8683             : }
    8684             : 
    8685          19 : static void GWKSumPreservingThread(void *pData)
    8686             : {
    8687          19 :     GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
    8688          19 :     GDALWarpKernel *poWK = psJob->poWK;
    8689          19 :     const int iYMin = psJob->iYMin;
    8690          19 :     const int iYMax = psJob->iYMax;
    8691             :     const bool bIsAffineNoRotation =
    8692          19 :         GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
    8693          28 :                                         poWK->pTransformerArg) &&
    8694             :         // for debug/testing purposes
    8695           9 :         CPLTestBool(
    8696          19 :             CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
    8697             :     const bool bAvoidNoDataSingleBand =
    8698          21 :         poWK->nBands == 1 ||
    8699           2 :         !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
    8700          19 :                                           "UNIFIED_SRC_NODATA", "FALSE"));
    8701             : 
    8702          19 :     const int nDstXSize = poWK->nDstXSize;
    8703          19 :     const int nSrcXSize = poWK->nSrcXSize;
    8704          19 :     const int nSrcYSize = poWK->nSrcYSize;
    8705             : 
    8706          38 :     std::vector<double> adfX0(nSrcXSize + 1);
    8707          38 :     std::vector<double> adfY0(nSrcXSize + 1);
    8708          38 :     std::vector<double> adfZ0(nSrcXSize + 1);
    8709          38 :     std::vector<double> adfX1(nSrcXSize + 1);
    8710          38 :     std::vector<double> adfY1(nSrcXSize + 1);
    8711          38 :     std::vector<double> adfZ1(nSrcXSize + 1);
    8712          38 :     std::vector<int> abSuccess0(nSrcXSize + 1);
    8713          38 :     std::vector<int> abSuccess1(nSrcXSize + 1);
    8714             : 
    8715             :     CPLRectObj sGlobalBounds;
    8716          19 :     sGlobalBounds.minx = -2 * poWK->dfXScale;
    8717          19 :     sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
    8718          19 :     sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
    8719          19 :     sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
    8720          19 :     CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
    8721             : 
    8722             :     struct SourcePixel
    8723             :     {
    8724             :         int iSrcX;
    8725             :         int iSrcY;
    8726             : 
    8727             :         // Coordinates of source pixel in target pixel coordinates
    8728             :         double dfDstX0;
    8729             :         double dfDstY0;
    8730             :         double dfDstX1;
    8731             :         double dfDstY1;
    8732             :         double dfDstX2;
    8733             :         double dfDstY2;
    8734             :         double dfDstX3;
    8735             :         double dfDstY3;
    8736             : 
    8737             :         // Source pixel total area (might be larger than the one described
    8738             :         // by above coordinates, if the pixel was crossing the antimeridian
    8739             :         // and split)
    8740             :         double dfArea;
    8741             :     };
    8742             : 
    8743          38 :     std::vector<SourcePixel> sourcePixels;
    8744             : 
    8745          38 :     XYPoly discontinuityLeft(5);
    8746          38 :     XYPoly discontinuityRight(5);
    8747             : 
    8748             :     /* ==================================================================== */
    8749             :     /*      First pass: transform the 4 corners of each potential           */
    8750             :     /*      contributing source pixel to target pixel coordinates.          */
    8751             :     /* ==================================================================== */
    8752             : 
    8753             :     // Special case for top line
    8754             :     {
    8755          19 :         int iY = 0;
    8756        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8757             :         {
    8758        3345 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8759        3345 :             adfY1[iX] = iY + poWK->nSrcYOff;
    8760        3345 :             adfZ1[iX] = 0;
    8761             :         }
    8762             : 
    8763          19 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8764             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8765             :                              abSuccess1.data());
    8766             : 
    8767        3364 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8768             :         {
    8769        3345 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8770           0 :                 abSuccess1[iX] = FALSE;
    8771             :             else
    8772             :             {
    8773        3345 :                 adfX1[iX] -= poWK->nDstXOff;
    8774        3345 :                 adfY1[iX] -= poWK->nDstYOff;
    8775             :             }
    8776             :         }
    8777             :     }
    8778             : 
    8779        2032 :     const auto getInsideXSign = [poWK, nDstXSize](double dfX)
    8780             :     {
    8781        2032 :         return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
    8782         872 :                        dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
    8783        2032 :                    ? 1
    8784        1160 :                    : -1;
    8785          19 :     };
    8786             : 
    8787             :     const auto FindDiscontinuity =
    8788          80 :         [poWK, psJob, getInsideXSign](
    8789             :             double dfXLeft, double dfXRight, double dfY,
    8790             :             int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
    8791         800 :             double &dfXMidReprojectedRight, double &dfYMidReprojected)
    8792             :     {
    8793         880 :         for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
    8794             :         {
    8795         800 :             double dfXMid = (dfXLeft + dfXRight) / 2;
    8796         800 :             double dfXMidReprojected = dfXMid;
    8797         800 :             dfYMidReprojected = dfY;
    8798         800 :             double dfZ = 0;
    8799         800 :             int nSuccess = 0;
    8800         800 :             poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
    8801             :                                  &dfXMidReprojected, &dfYMidReprojected, &dfZ,
    8802             :                                  &nSuccess);
    8803         800 :             if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
    8804             :             {
    8805         456 :                 dfXRight = dfXMid;
    8806         456 :                 dfXMidReprojectedRight = dfXMidReprojected;
    8807             :             }
    8808             :             else
    8809             :             {
    8810         344 :                 dfXLeft = dfXMid;
    8811         344 :                 dfXMidReprojectedLeft = dfXMidReprojected;
    8812             :             }
    8813             :         }
    8814          80 :     };
    8815             : 
    8816        2685 :     for (int iY = 0; iY < nSrcYSize; ++iY)
    8817             :     {
    8818        2666 :         std::swap(adfX0, adfX1);
    8819        2666 :         std::swap(adfY0, adfY1);
    8820        2666 :         std::swap(adfZ0, adfZ1);
    8821        2666 :         std::swap(abSuccess0, abSuccess1);
    8822             : 
    8823     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8824             :         {
    8825     4833460 :             adfX1[iX] = iX + poWK->nSrcXOff;
    8826     4833460 :             adfY1[iX] = iY + 1 + poWK->nSrcYOff;
    8827     4833460 :             adfZ1[iX] = 0;
    8828             :         }
    8829             : 
    8830        2666 :         poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
    8831             :                              adfX1.data(), adfY1.data(), adfZ1.data(),
    8832             :                              abSuccess1.data());
    8833             : 
    8834     4836120 :         for (int iX = 0; iX <= nSrcXSize; ++iX)
    8835             :         {
    8836     4833460 :             if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
    8837           0 :                 abSuccess1[iX] = FALSE;
    8838             :             else
    8839             :             {
    8840     4833460 :                 adfX1[iX] -= poWK->nDstXOff;
    8841     4833460 :                 adfY1[iX] -= poWK->nDstYOff;
    8842             :             }
    8843             :         }
    8844             : 
    8845     4833460 :         for (int iX = 0; iX < nSrcXSize; ++iX)
    8846             :         {
    8847     9661580 :             if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
    8848     4830790 :                 abSuccess1[iX + 1])
    8849             :             {
    8850             :                 /* --------------------------------------------------------------------
    8851             :                  */
    8852             :                 /*      Do not try to apply transparent source pixels to the
    8853             :                  * destination.*/
    8854             :                 /* --------------------------------------------------------------------
    8855             :                  */
    8856     4830790 :                 const auto iSrcOffset =
    8857     4830790 :                     iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
    8858     9560570 :                 if (poWK->panUnifiedSrcValid != nullptr &&
    8859     4729780 :                     !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
    8860             :                 {
    8861     4738340 :                     continue;
    8862             :                 }
    8863             : 
    8864      103415 :                 if (poWK->pafUnifiedSrcDensity != nullptr)
    8865             :                 {
    8866           0 :                     if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
    8867             :                         SRC_DENSITY_THRESHOLD_FLOAT)
    8868           0 :                         continue;
    8869             :                 }
    8870             : 
    8871             :                 SourcePixel sp;
    8872      103415 :                 sp.dfArea = 0;
    8873      103415 :                 sp.dfDstX0 = adfX0[iX];
    8874      103415 :                 sp.dfDstY0 = adfY0[iX];
    8875      103415 :                 sp.dfDstX1 = adfX0[iX + 1];
    8876      103415 :                 sp.dfDstY1 = adfY0[iX + 1];
    8877      103415 :                 sp.dfDstX2 = adfX1[iX + 1];
    8878      103415 :                 sp.dfDstY2 = adfY1[iX + 1];
    8879      103415 :                 sp.dfDstX3 = adfX1[iX];
    8880      103415 :                 sp.dfDstY3 = adfY1[iX];
    8881             : 
    8882             :                 // Detect pixel that likely cross the anti-meridian and
    8883             :                 // introduce a discontinuity when reprojected.
    8884             : 
    8885      103415 :                 if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
    8886          80 :                     std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
    8887          40 :                     getInsideXSign(adfX0[iX]) !=
    8888          80 :                         getInsideXSign(adfX0[iX + 1]) &&
    8889          80 :                     getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
    8890          40 :                     getInsideXSign(adfX0[iX + 1]) ==
    8891      103495 :                         getInsideXSign(adfX1[iX + 1]) &&
    8892          40 :                     (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
    8893             :                         0)
    8894             :                 {
    8895             : #ifdef DEBUG_VERBOSE
    8896             :                     CPLDebug(
    8897             :                         "WARP",
    8898             :                         "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
    8899             :                         "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
    8900             :                         "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
    8901             :                         iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
    8902             :                         adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
    8903             :                         adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
    8904             : #endif
    8905          40 :                     double dfXMidReprojectedLeftTop = 0;
    8906          40 :                     double dfXMidReprojectedRightTop = 0;
    8907          40 :                     double dfYMidReprojectedTop = 0;
    8908          40 :                     FindDiscontinuity(
    8909          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8910          80 :                         iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
    8911             :                         dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
    8912             :                         dfYMidReprojectedTop);
    8913          40 :                     double dfXMidReprojectedLeftBottom = 0;
    8914          40 :                     double dfXMidReprojectedRightBottom = 0;
    8915          40 :                     double dfYMidReprojectedBottom = 0;
    8916          40 :                     FindDiscontinuity(
    8917          40 :                         iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
    8918          80 :                         iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
    8919             :                         dfXMidReprojectedLeftBottom,
    8920             :                         dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
    8921             : 
    8922          40 :                     discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
    8923          40 :                     discontinuityLeft[1] =
    8924          80 :                         XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
    8925          40 :                     discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
    8926          40 :                                                   dfYMidReprojectedBottom);
    8927          40 :                     discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
    8928          40 :                     discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
    8929             : 
    8930          40 :                     discontinuityRight[0] =
    8931          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8932          40 :                     discontinuityRight[1] =
    8933          80 :                         XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
    8934          40 :                     discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
    8935          40 :                                                    dfYMidReprojectedBottom);
    8936          40 :                     discontinuityRight[3] =
    8937          80 :                         XYPair(adfX1[iX + 1], adfY1[iX + 1]);
    8938          40 :                     discontinuityRight[4] =
    8939          80 :                         XYPair(adfX0[iX + 1], adfY0[iX + 1]);
    8940             : 
    8941          40 :                     sp.dfArea = getArea(discontinuityLeft) +
    8942          40 :                                 getArea(discontinuityRight);
    8943          40 :                     if (getInsideXSign(adfX0[iX]) >= 1)
    8944             :                     {
    8945          20 :                         sp.dfDstX1 = dfXMidReprojectedLeftTop;
    8946          20 :                         sp.dfDstY1 = dfYMidReprojectedTop;
    8947          20 :                         sp.dfDstX2 = dfXMidReprojectedLeftBottom;
    8948          20 :                         sp.dfDstY2 = dfYMidReprojectedBottom;
    8949             :                     }
    8950             :                     else
    8951             :                     {
    8952          20 :                         sp.dfDstX0 = dfXMidReprojectedRightTop;
    8953          20 :                         sp.dfDstY0 = dfYMidReprojectedTop;
    8954          20 :                         sp.dfDstX3 = dfXMidReprojectedRightBottom;
    8955          20 :                         sp.dfDstY3 = dfYMidReprojectedBottom;
    8956             :                     }
    8957             :                 }
    8958             : 
    8959             :                 // Bounding box of source pixel (expressed in target pixel
    8960             :                 // coordinates)
    8961             :                 CPLRectObj sRect;
    8962      103415 :                 sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
    8963      103415 :                                       std::min(sp.dfDstX2, sp.dfDstX3));
    8964      103415 :                 sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
    8965      103415 :                                       std::min(sp.dfDstY2, sp.dfDstY3));
    8966      103415 :                 sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
    8967      103415 :                                       std::max(sp.dfDstX2, sp.dfDstX3));
    8968      103415 :                 sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
    8969      103415 :                                       std::max(sp.dfDstY2, sp.dfDstY3));
    8970      103415 :                 if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
    8971      101355 :                       sRect.miny < iYMax && sRect.maxy > iYMin))
    8972             :                 {
    8973       10852 :                     continue;
    8974             :                 }
    8975             : 
    8976       92563 :                 sp.iSrcX = iX;
    8977       92563 :                 sp.iSrcY = iY;
    8978             : 
    8979       92563 :                 if (!bIsAffineNoRotation)
    8980             :                 {
    8981             :                     // Check polygon validity (no self-crossing)
    8982       89745 :                     XYPair xy;
    8983       89745 :                     if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
    8984       89745 :                                         XYPair(sp.dfDstX1, sp.dfDstY1),
    8985       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8986      269235 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
    8987       89745 :                         getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
    8988       89745 :                                         XYPair(sp.dfDstX2, sp.dfDstY2),
    8989       89745 :                                         XYPair(sp.dfDstX0, sp.dfDstY0),
    8990      179490 :                                         XYPair(sp.dfDstX3, sp.dfDstY3), xy))
    8991             :                     {
    8992         113 :                         continue;
    8993             :                     }
    8994             :                 }
    8995             : 
    8996       92450 :                 CPLQuadTreeInsertWithBounds(
    8997             :                     hQuadTree,
    8998             :                     reinterpret_cast<void *>(
    8999       92450 :                         static_cast<uintptr_t>(sourcePixels.size())),
    9000             :                     &sRect);
    9001             : 
    9002       92450 :                 sourcePixels.push_back(sp);
    9003             :             }
    9004             :         }
    9005             :     }
    9006             : 
    9007          38 :     std::vector<double> adfRealValue(poWK->nBands);
    9008          38 :     std::vector<double> adfImagValue(poWK->nBands);
    9009          38 :     std::vector<double> adfBandDensity(poWK->nBands);
    9010          38 :     std::vector<double> adfWeight(poWK->nBands);
    9011             : 
    9012             : #ifdef CHECK_SUM_WITH_GEOS
    9013             :     auto hGEOSContext = OGRGeometry::createGEOSContext();
    9014             :     auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    9015             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
    9016             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
    9017             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
    9018             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
    9019             :     GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
    9020             :     auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
    9021             :     auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
    9022             : 
    9023             :     auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
    9024             :     auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
    9025             :     auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
    9026             : #endif
    9027             : 
    9028             :     const XYPoly xy1{
    9029          38 :         {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
    9030          38 :     XYPoly xy2(5);
    9031          38 :     XYPoly xy2_triangle(4);
    9032          38 :     XYPoly intersection;
    9033             : 
    9034             :     /* ==================================================================== */
    9035             :     /*      Loop over output lines.                                         */
    9036             :     /* ==================================================================== */
    9037        1951 :     for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
    9038             :     {
    9039             :         CPLRectObj sRect;
    9040        1932 :         sRect.miny = iDstY;
    9041        1932 :         sRect.maxy = iDstY + 1;
    9042             : 
    9043             :         /* ====================================================================
    9044             :          */
    9045             :         /*      Loop over pixels in output scanline. */
    9046             :         /* ====================================================================
    9047             :          */
    9048     1403940 :         for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
    9049             :         {
    9050     1402010 :             sRect.minx = iDstX;
    9051     1402010 :             sRect.maxx = iDstX + 1;
    9052     1402010 :             int nSourcePixels = 0;
    9053             :             void **pahSourcePixel =
    9054     1402010 :                 CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
    9055     1402010 :             if (nSourcePixels == 0)
    9056             :             {
    9057     1183090 :                 CPLFree(pahSourcePixel);
    9058     1183100 :                 continue;
    9059             :             }
    9060             : 
    9061      218919 :             std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
    9062      218919 :             std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
    9063      218919 :             std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
    9064      218919 :             std::fill(adfWeight.begin(), adfWeight.end(), 0);
    9065      218919 :             double dfDensity = 0;
    9066             :             // Just above zero to please Coveriy Scan
    9067      218919 :             double dfTotalWeight = std::numeric_limits<double>::min();
    9068             : 
    9069             :             /* ====================================================================
    9070             :              */
    9071             :             /*          Iterate over each contributing source pixel to add its
    9072             :              */
    9073             :             /*          value weighed by the ratio of the area of its
    9074             :              * intersection  */
    9075             :             /*          with the target pixel divided by the area of the source
    9076             :              */
    9077             :             /*          pixel. */
    9078             :             /* ====================================================================
    9079             :              */
    9080     1020550 :             for (int i = 0; i < nSourcePixels; ++i)
    9081             :             {
    9082      801628 :                 const int iSourcePixel = static_cast<int>(
    9083      801628 :                     reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
    9084      801628 :                 auto &sp = sourcePixels[iSourcePixel];
    9085             : 
    9086      801628 :                 double dfWeight = 0.0;
    9087      801628 :                 if (bIsAffineNoRotation)
    9088             :                 {
    9089             :                     // Optimization since the source pixel is a rectangle in
    9090             :                     // target pixel coordinates
    9091       16326 :                     double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
    9092       16326 :                     double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
    9093       16326 :                     double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
    9094       16326 :                     double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
    9095       16326 :                     double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
    9096       16326 :                     double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
    9097       16326 :                     double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
    9098       16326 :                     double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
    9099       16326 :                     dfWeight =
    9100       16326 :                         ((dfIntersMaxX - dfIntersMinX) *
    9101       16326 :                          (dfIntersMaxY - dfIntersMinY)) /
    9102       16326 :                         ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
    9103             :                 }
    9104             :                 else
    9105             :                 {
    9106             :                     // Compute the polygon of the source pixel in target pixel
    9107             :                     // coordinates, and shifted to the target pixel (unit square
    9108             :                     // coordinates)
    9109             : 
    9110      785302 :                     xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9111      785302 :                     xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
    9112      785302 :                     xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
    9113      785302 :                     xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
    9114      785302 :                     xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
    9115             : 
    9116      785302 :                     if (isConvex(xy2))
    9117             :                     {
    9118      785302 :                         getConvexPolyIntersection(xy1, xy2, intersection);
    9119      785302 :                         if (intersection.size() >= 3)
    9120             :                         {
    9121      468849 :                             dfWeight = getArea(intersection);
    9122             :                         }
    9123             :                     }
    9124             :                     else
    9125             :                     {
    9126             :                         // Split xy2 into 2 triangles.
    9127           0 :                         xy2_triangle[0] = xy2[0];
    9128           0 :                         xy2_triangle[1] = xy2[1];
    9129           0 :                         xy2_triangle[2] = xy2[2];
    9130           0 :                         xy2_triangle[3] = xy2[0];
    9131           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9132             :                                                   intersection);
    9133           0 :                         if (intersection.size() >= 3)
    9134             :                         {
    9135           0 :                             dfWeight = getArea(intersection);
    9136             :                         }
    9137             : 
    9138           0 :                         xy2_triangle[1] = xy2[2];
    9139           0 :                         xy2_triangle[2] = xy2[3];
    9140           0 :                         getConvexPolyIntersection(xy1, xy2_triangle,
    9141             :                                                   intersection);
    9142           0 :                         if (intersection.size() >= 3)
    9143             :                         {
    9144           0 :                             dfWeight += getArea(intersection);
    9145             :                         }
    9146             :                     }
    9147      785302 :                     if (dfWeight > 0.0)
    9148             :                     {
    9149      468828 :                         if (sp.dfArea == 0)
    9150       89592 :                             sp.dfArea = getArea(xy2);
    9151      468828 :                         dfWeight /= sp.dfArea;
    9152             :                     }
    9153             : 
    9154             : #ifdef CHECK_SUM_WITH_GEOS
    9155             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
    9156             :                                          sp.dfDstX0 - iDstX,
    9157             :                                          sp.dfDstY0 - iDstY);
    9158             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
    9159             :                                          sp.dfDstX1 - iDstX,
    9160             :                                          sp.dfDstY1 - iDstY);
    9161             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
    9162             :                                          sp.dfDstX2 - iDstX,
    9163             :                                          sp.dfDstY2 - iDstY);
    9164             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
    9165             :                                          sp.dfDstX3 - iDstX,
    9166             :                                          sp.dfDstY3 - iDstY);
    9167             :                     GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
    9168             :                                          sp.dfDstX0 - iDstX,
    9169             :                                          sp.dfDstY0 - iDstY);
    9170             : 
    9171             :                     double dfWeightGEOS = 0.0;
    9172             :                     auto hIntersection =
    9173             :                         GEOSIntersection_r(hGEOSContext, hP1, hP2);
    9174             :                     if (hIntersection)
    9175             :                     {
    9176             :                         double dfIntersArea = 0.0;
    9177             :                         if (GEOSArea_r(hGEOSContext, hIntersection,
    9178             :                                        &dfIntersArea) &&
    9179             :                             dfIntersArea > 0)
    9180             :                         {
    9181             :                             double dfSourceArea = 0.0;
    9182             :                             if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
    9183             :                             {
    9184             :                                 dfWeightGEOS = dfIntersArea / dfSourceArea;
    9185             :                             }
    9186             :                         }
    9187             :                         GEOSGeom_destroy_r(hGEOSContext, hIntersection);
    9188             :                     }
    9189             :                     if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
    9190             :                     {
    9191             :                         /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
    9192             :                                         dfWeight, dfWeightGEOS);
    9193             :                         printf("xy2: ");  // ok
    9194             :                         for (const auto &xy : xy2)
    9195             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9196             :                         printf("\n");                                   // ok
    9197             :                         printf("intersection: ");                       // ok
    9198             :                         for (const auto &xy : intersection)
    9199             :                             printf("[%f, %f], ", xy.first, xy.second);  // ok
    9200             :                         printf("\n");                                   // ok
    9201             :                     }
    9202             : #endif
    9203             :                 }
    9204      801628 :                 if (dfWeight > 0.0)
    9205             :                 {
    9206             : #ifdef DEBUG_VERBOSE
    9207             : #if defined(DST_X) && defined(DST_Y)
    9208             :                     if (iDstX + poWK->nDstXOff == DST_X &&
    9209             :                         iDstY + poWK->nDstYOff == DST_Y)
    9210             :                     {
    9211             :                         CPLDebug("WARP",
    9212             :                                  "iSrcX = %d, iSrcY = %d, weight =%.17g",
    9213             :                                  sp.iSrcX + poWK->nSrcXOff,
    9214             :                                  sp.iSrcY + poWK->nSrcYOff, dfWeight);
    9215             :                     }
    9216             : #endif
    9217             : #endif
    9218             : 
    9219      474104 :                     const GPtrDiff_t iSrcOffset =
    9220      474104 :                         sp.iSrcX +
    9221      474104 :                         static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
    9222      474104 :                     dfTotalWeight += dfWeight;
    9223             : 
    9224      474104 :                     if (poWK->pafUnifiedSrcDensity != nullptr)
    9225             :                     {
    9226           0 :                         dfDensity +=
    9227           0 :                             dfWeight *
    9228           0 :                             double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
    9229             :                     }
    9230             :                     else
    9231             :                     {
    9232      474104 :                         dfDensity += dfWeight;
    9233             :                     }
    9234             : 
    9235     1818730 :                     for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9236             :                     {
    9237             :                         // Returns pixel value if it is not no data.
    9238             :                         double dfBandDensity;
    9239             :                         double dfRealValue;
    9240             :                         double dfImagValue;
    9241     2689250 :                         if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
    9242             :                                                &dfBandDensity, &dfRealValue,
    9243             :                                                &dfImagValue) &&
    9244     1344620 :                               dfBandDensity > BAND_DENSITY_THRESHOLD))
    9245             :                         {
    9246           0 :                             continue;
    9247             :                         }
    9248             : #ifdef DEBUG_VERBOSE
    9249             : #if defined(DST_X) && defined(DST_Y)
    9250             :                         if (iDstX + poWK->nDstXOff == DST_X &&
    9251             :                             iDstY + poWK->nDstYOff == DST_Y)
    9252             :                         {
    9253             :                             CPLDebug("WARP", "value * weight = %.17g",
    9254             :                                      dfRealValue * dfWeight);
    9255             :                         }
    9256             : #endif
    9257             : #endif
    9258             : 
    9259     1344620 :                         adfRealValue[iBand] += dfRealValue * dfWeight;
    9260     1344620 :                         adfImagValue[iBand] += dfImagValue * dfWeight;
    9261     1344620 :                         adfBandDensity[iBand] += dfBandDensity * dfWeight;
    9262     1344620 :                         adfWeight[iBand] += dfWeight;
    9263             :                     }
    9264             :                 }
    9265             :             }
    9266             : 
    9267      218919 :             CPLFree(pahSourcePixel);
    9268             : 
    9269             :             /* --------------------------------------------------------------------
    9270             :              */
    9271             :             /*          Update destination pixel value. */
    9272             :             /* --------------------------------------------------------------------
    9273             :              */
    9274      218919 :             bool bHasFoundDensity = false;
    9275      218919 :             const GPtrDiff_t iDstOffset =
    9276      218919 :                 iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
    9277      827838 :             for (int iBand = 0; iBand < poWK->nBands; ++iBand)
    9278             :             {
    9279      608919 :                 if (adfWeight[iBand] > 0)
    9280             :                 {
    9281             :                     const double dfBandDensity =
    9282      608909 :                         adfBandDensity[iBand] / adfWeight[iBand];
    9283      608909 :                     if (dfBandDensity > BAND_DENSITY_THRESHOLD)
    9284             :                     {
    9285      608909 :                         bHasFoundDensity = true;
    9286      608909 :                         GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
    9287      608909 :                                          adfRealValue[iBand],
    9288      608909 :                                          adfImagValue[iBand],
    9289             :                                          bAvoidNoDataSingleBand);
    9290             :                     }
    9291             :                 }
    9292             :             }
    9293             : 
    9294      218919 :             if (!bHasFoundDensity)
    9295          10 :                 continue;
    9296             : 
    9297      218909 :             if (!bAvoidNoDataSingleBand)
    9298             :             {
    9299           0 :                 GWKAvoidNoDataMultiBand(poWK, iDstOffset);
    9300             :             }
    9301             : 
    9302             :             /* --------------------------------------------------------------------
    9303             :              */
    9304             :             /*          Update destination density/validity masks. */
    9305             :             /* --------------------------------------------------------------------
    9306             :              */
    9307      218909 :             GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
    9308             : 
    9309      218909 :             if (poWK->panDstValid != nullptr)
    9310             :             {
    9311       11752 :                 CPLMaskSet(poWK->panDstValid, iDstOffset);
    9312             :             }
    9313             :         }
    9314             : 
    9315             :         /* --------------------------------------------------------------------
    9316             :          */
    9317             :         /*      Report progress to the user, and optionally cancel out. */
    9318             :         /* --------------------------------------------------------------------
    9319             :          */
    9320        1932 :         if (psJob->pfnProgress && psJob->pfnProgress(psJob))
    9321           0 :             break;
    9322             :     }
    9323             : 
    9324             : #ifdef CHECK_SUM_WITH_GEOS
    9325             :     GEOSGeom_destroy_r(hGEOSContext, hP1);
    9326             :     GEOSGeom_destroy_r(hGEOSContext, hP2);
    9327             :     OGRGeometry::freeGEOSContext(hGEOSContext);
    9328             : #endif
    9329          19 :     CPLQuadTreeDestroy(hQuadTree);
    9330          19 : }

Generated by: LCOV version 1.14